Compare commits

...

25 Commits

Author SHA1 Message Date
acute_interpreter_panic
964512de06 fixed pointer issue 2025-10-10 14:36:33 +02:00
acute_interpreter_panic
87e276de3d very slim skelleton for downloading 2025-10-10 14:07:03 +02:00
acute_interpreter_panic
04e8378f3f improved shell 2025-10-10 13:45:27 +02:00
acute_interpreter_panic
293de8b473 scraping audio url 2025-10-10 13:44:00 +02:00
acute_interpreter_panic
82c541653c added audio url to datatype 2025-10-10 13:25:29 +02:00
acute_interpreter_panic
1ff37517a5 test 2025-10-10 13:21:04 +02:00
acute_interpreter_panic
c7fceb277d added running of commands 2025-10-10 13:19:32 +02:00
acute_interpreter_panic
8896cb7d09 string 2025-10-10 13:12:47 +02:00
acute_interpreter_panic
c2794367b3 bracket stuff 2025-10-10 01:40:22 +02:00
acute_interpreter_panic
16318dac20 removing suffix 2025-10-10 01:27:35 +02:00
acute_interpreter_panic
655087fa42 performance 2025-10-10 01:11:03 +02:00
acute_interpreter_panic
19185f38a3 fixed fetching of discography 2025-10-10 00:40:58 +02:00
acute_interpreter_panic
81d29e4d8b deps 2025-10-10 00:08:45 +02:00
Hazel Noack
3565f14181 partial fetching of artist 2025-10-09 14:37:13 +02:00
Hazel Noack
1ae859b9b9 parsing artist url 2025-10-09 14:05:10 +02:00
Hazel Noack
4c0f19b257 adding helpfull debug info at hell 2025-10-09 13:50:27 +02:00
Hazel Noack
6a1baa9eed adding source types in musify scrapers 2025-10-09 13:48:05 +02:00
Hazel Noack
d74a324999 ability to reference source type 2025-10-09 13:40:40 +02:00
Hazel Noack
184ffdf2b8 removed prints 2025-10-09 13:36:42 +02:00
Hazel Noack
b5a78eb5b9 fixed wrong source type 2025-10-09 13:35:47 +02:00
Hazel Noack
07468e5b4f moved zero pad 2025-10-09 13:03:51 +02:00
Hazel Noack
fe244c7e68 merging unrelated objects into one 2025-10-09 12:53:25 +02:00
Hazel Noack
e79e364ff9 removed function fetch list 2025-10-09 12:35:00 +02:00
Hazel Noack
6bd957b09e removed redundant output 2025-10-09 12:23:48 +02:00
Hazel Noack
4eab2bb874 fetching from object not from source 2025-10-09 12:23:10 +02:00
11 changed files with 765 additions and 123 deletions

7
go.mod
View File

@@ -3,7 +3,8 @@ module gitea.elara.ws/Hazel/music-kraken
go 1.24.2
require (
github.com/PuerkitoBio/goquery v1.10.3 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
golang.org/x/net v0.45.0 // indirect
github.com/PuerkitoBio/goquery v1.10.3
golang.org/x/net v0.45.0
)
require github.com/andybalholm/cascadia v1.3.3 // indirect

View File

@@ -6,22 +6,16 @@ import (
"fmt"
"log"
"os"
"regexp"
"strconv"
"strings"
"gitea.elara.ws/Hazel/music-kraken/internal/common"
"gitea.elara.ws/Hazel/music-kraken/internal/common/color"
"gitea.elara.ws/Hazel/music-kraken/internal/data"
"gitea.elara.ws/Hazel/music-kraken/internal/plugin"
)
func zeroPad(num int, length int) string {
str := strconv.Itoa(num)
if len(str) >= length {
return str
}
return strings.Repeat("0", length-len(str)) + str
}
func printResults(musicObjects []data.MusicObject) {
if len(musicObjects) <= 0 {
return
@@ -30,7 +24,7 @@ func printResults(musicObjects []data.MusicObject) {
results := make([]string, len(musicObjects))
for i, m := range musicObjects {
results[i] = zeroPad(i, 2) + ": "
results[i] = common.ZeroPad(i, 2) + ": "
if a, ok := m.(data.Artist); ok {
results[i] += "#a " + a.Name
@@ -55,7 +49,10 @@ func printResults(musicObjects []data.MusicObject) {
sources := m.GetSources()
if len(sources) > 0 {
for _, source := range sources {
results[i] += "\n\t- " + source.Url
results[i] += "\n\t- " + source.SourceType.Name + " " + string(source.ObjectType) + " " + source.Url
if source.AudioUrl != "" {
results[i] += "\n\t " + source.AudioUrl
}
}
} else {
results[i] = color.StrikeThrough + results[i] + color.Reset
@@ -77,39 +74,63 @@ func (s musicObjectStore) currentMusicObjects() ([]data.MusicObject, error) {
return (s)[len(s)-1], nil
}
func interpretCommand(command string, store musicObjectStore) (musicObjectStore, error) {
var indexSelectionPattern = regexp.MustCompile(`^[\d ,]+$`)
func interpretCommand(command string, store musicObjectStore) (musicObjectStore, []error) {
// going back in history
if command == ".." {
if len(store) <= 1 {
return store, errors.New("can't go back")
return store, []error{errors.New("can't go back")}
}
return store[:len(store)-1], nil
return store[:len(store)-1], []error{}
}
forceDownload := false
if strings.HasPrefix(command, "d:") {
command, _ = strings.CutPrefix(command, "d:")
command = strings.TrimSpace(command)
forceDownload = true
}
// fetch special music object
if index, err := strconv.Atoi(command); err == nil {
if indexSelectionPattern.MatchString(command) {
currentMusicObjects, err := store.currentMusicObjects()
if err != nil {
return store, err
return store, []error{err}
}
if index >= len(currentMusicObjects) || index < 0 {
return store, errors.New(command + " is out of bounds [0 <= " + strconv.Itoa(index) + " <= " + strconv.Itoa(len(currentMusicObjects)-1) + "]")
var fetched data.MusicObject
for _, stringIndex := range strings.Split(command, ",") {
index, _ := strconv.Atoi(strings.TrimSpace(stringIndex))
if index >= len(currentMusicObjects) || index < 0 {
return store, []error{errors.New(command + " is out of bounds [0 <= " + strconv.Itoa(index) + " <= " + strconv.Itoa(len(currentMusicObjects)-1) + "]")}
}
current := currentMusicObjects[index]
newFetched, err := plugin.Fetch(current)
if err != nil {
return store, []error{err}
}
if fetched == nil {
fetched = newFetched
} else {
fetched = fetched.Merge(newFetched)
}
}
current := currentMusicObjects[index]
if len(current.GetSources()) <= 0 {
return store, errors.New("selected object has no sources to download")
if forceDownload {
return store, plugin.Download(fetched)
} else {
return append(store, fetched.Related()), []error{}
}
currentMusicObjects, err = plugin.FetchList(current.GetSources()[0])
if err != nil {
return store, err
}
return append(store, currentMusicObjects), nil
} else if forceDownload {
return store, []error{errors.New("can only download indices not " + command)}
}
// search in every other case
@@ -119,30 +140,48 @@ func interpretCommand(command string, store musicObjectStore) (musicObjectStore,
fmt.Println()
}
return append(store, currentMusicObjects), nil
return append(store, currentMusicObjects), []error{}
}
func Shell() {
func Shell(commandsList ...[]string) {
plugin.RegisterPlugin(&plugin.Musify{})
commands := []string{}
if len(commandsList) > 0 {
commands = commandsList[0]
}
fmt.Println("== MusicKraken Shell ==")
fmt.Println()
store := musicObjectStore{}
var err error = nil
for {
fmt.Print("> ")
var command string
reader := bufio.NewReader(os.Stdin)
command, err := reader.ReadString('\n')
if err != nil {
log.Fatal(err)
if len(commands) <= 0 {
fmt.Print("> ")
reader := bufio.NewReader(os.Stdin)
command, err = reader.ReadString('\n')
if err != nil {
log.Fatal(err)
}
} else {
command = commands[0]
commands = commands[1:]
fmt.Println("> " + command)
}
store, err = interpretCommand(strings.TrimSpace(command), store)
if err != nil {
var errList []error
store, errList = interpretCommand(strings.TrimSpace(command), store)
if len(errList) > 0 {
fmt.Println()
fmt.Println(color.Yellow + err.Error() + color.Reset)
for _, err := range errList {
fmt.Println(color.Yellow + err.Error() + color.Reset)
}
fmt.Println()
}

View File

@@ -1,6 +1,7 @@
package common
import (
"strconv"
"strings"
)
@@ -12,3 +13,112 @@ func Unify(s string) string {
}
return s
}
func ZeroPad(num int, length int) string {
str := strconv.Itoa(num)
if len(str) >= length {
return str
}
return strings.Repeat("0", length-len(str)) + str
}
func IsNumeric(num string) bool {
for _, c := range num {
if c < '0' || c > '9' {
return false
}
}
return true
}
var commonTitleSuffix = []string{"(official video)"}
const openBrackets = "(["
const closeBrackets = ")]"
var forbiddenSubstringInBrackets = []string{"official", "video", "audio", "lyrics", "prod", "remix", "ft", "feat", "ft.", "feat."}
func CleanSongTitle(title string, artistName string) string {
/*
# remove brackets and their content if they contain disallowed substrings
for open_bracket, close_bracket in zip(OPEN_BRACKETS, CLOSE_BRACKETS):
if open_bracket not in raw_song_title or close_bracket not in raw_song_title:
continue
start = 0
while True:
try:
open_bracket_index = raw_song_title.index(open_bracket, start)
except ValueError:
break
try:
close_bracket_index = raw_song_title.index(close_bracket, open_bracket_index + 1)
except ValueError:
break
substring = raw_song_title[open_bracket_index + 1:close_bracket_index]
if any(disallowed_substring in substring.lower() for disallowed_substring in DISALLOWED_SUBSTRING_IN_BRACKETS):
raw_song_title = raw_song_title[:open_bracket_index] + raw_song_title[close_bracket_index + 1:]
else:
start = close_bracket_index + 1
# everything that requires the artist name
if artist_name is not None:
artist_name = artist_name.strip()
# Remove artist from the start of the title
if raw_song_title.lower().startswith(artist_name.lower()):
possible_new_name = raw_song_title[len(artist_name):].strip()
for char in ("-", "", ":", "|"):
if possible_new_name.startswith(char):
raw_song_title = possible_new_name[1:].strip()
break
return raw_song_title.strip()
*/
title = strings.TrimSpace(title)
for _, d := range commonTitleSuffix {
if strings.HasSuffix(strings.ToLower(title), d) {
title = strings.TrimSpace(title[:len(d)-1])
}
}
for b, open := range openBrackets {
close := closeBrackets[b]
s := -1
e := -1
for i, c := range title {
if c == open {
s = i
} else if c == rune(close) {
e = i
}
}
remove := false
if s > -1 {
substring := title[s:e]
for _, f := range forbiddenSubstringInBrackets {
if strings.Contains(substring, f) {
remove = true
break
}
}
}
if remove {
title = title[:s] + title[e:]
}
}
return title
}

View File

@@ -35,3 +35,18 @@ func TestUnify(t *testing.T) {
t.Errorf(`Double whitespaces need to be removed`)
}
}
func TestZeroPad(t *testing.T) {
cases := map[int]string{
0: "000",
5: "005",
1000: "1000",
50: "050",
}
for key, val := range cases {
if res := ZeroPad(key, 3); res != val {
t.Errorf(`did not match`)
}
}
}

View File

@@ -11,6 +11,7 @@ type MusicObject interface {
Compile() MusicObject
GetIndices() []string
Merge(other MusicObject) MusicObject
Related() []MusicObject
}
func dedupeMusicObjects[T MusicObject](inputMusicObjects []T) []T {
@@ -71,6 +72,20 @@ type Song struct {
Sources []Source
}
func (m Song) Related() []MusicObject {
res := []MusicObject{}
for _, a := range m.Artists {
res = append(res, a)
}
if m.Album.Name != "" {
res = append(res, m.Album)
}
res = append(res, m)
return res
}
func (m Song) GetSources() []Source {
return m.Sources
}
@@ -127,6 +142,20 @@ type Album struct {
Sources []Source
}
func (m Album) Related() []MusicObject {
res := []MusicObject{}
for _, a := range m.Artists {
res = append(res, a)
}
res = append(res, m)
for _, a := range m.Songs {
res = append(res, a)
}
return res
}
func (m Album) GetSources() []Source {
return m.Sources
}
@@ -183,6 +212,16 @@ type Artist struct {
Sources []Source
}
func (m Artist) Related() []MusicObject {
res := []MusicObject{m}
for _, a := range m.Albums {
res = append(res, a)
}
return res
}
func (m Artist) GetSources() []Source {
return m.Sources
}

View File

@@ -19,7 +19,9 @@ const AlbumSource = ObjectType("album")
const ArtistSource = ObjectType("artist")
type Source struct {
Url string
Url string
AudioUrl string // will only be used when ObjectType = SongSource
SourceType *SourceType
ObjectType ObjectType
}
@@ -38,6 +40,9 @@ func dedupeSources(inputSources []Source) []Source {
if source.SourceType != nil {
deduped[mergeWithIndex].SourceType = source.SourceType
}
if source.AudioUrl != "" {
deduped[mergeWithIndex].AudioUrl = source.AudioUrl
}
} else {
// just appending

View File

@@ -2,6 +2,7 @@ package plugin
import (
"errors"
"fmt"
"regexp"
"gitea.elara.ws/Hazel/music-kraken/internal/common"
@@ -15,7 +16,7 @@ type Plugin interface {
RegexAlbum() *regexp.Regexp
RegexSong() *regexp.Regexp
Init()
Init(data.SourceType)
Search(query common.Query) ([]data.MusicObject, error)
@@ -44,7 +45,7 @@ func RegisterPlugin(plugin Plugin) error {
namePlugins[name] = plugin
plugin.Init()
plugin.Init(NameSourceType[name])
return nil
}
@@ -99,7 +100,7 @@ func compileSource(source data.Source) (data.Source, error) {
return source, errors.New("couldn't find corresponding object source on " + sourceType.Name + " for " + source.Url)
}
func Fetch(source data.Source) (data.MusicObject, error) {
func FetchSource(source data.Source) (data.MusicObject, error) {
// the fetch function without the post processing of the music objects
source, err := compileSource(source)
if err != nil {
@@ -147,39 +148,23 @@ func Fetch(source data.Source) (data.MusicObject, error) {
return nil, nil
}
func FetchList(source data.Source) ([]data.MusicObject, error) {
res := []data.MusicObject{}
func Fetch(musicObject data.MusicObject) (data.MusicObject, error) {
sources := musicObject.GetSources()
musicObject, err := Fetch(source)
if err != nil {
return res, err
if len(sources) <= 0 {
return musicObject, errors.New("didn't find a source for object")
}
if a, ok := musicObject.(data.Song); ok {
for _, ar := range a.Artists {
res = append(res, ar)
for _, source := range sources {
newMusicObject, err := FetchSource(source)
if err != nil {
return musicObject, err
}
if a.Album.Name != "" {
res = append(res, a.Album, a)
}
} else if a, ok := musicObject.(data.Album); ok {
for _, ar := range a.Artists {
res = append(res, ar)
}
res = append(res, a)
for _, s := range a.Songs {
res = append(res, s)
}
} else if a, ok := musicObject.(data.Artist); ok {
res = append(res, a)
for _, al := range a.Albums {
res = append(res, al)
}
} else {
res = append(res, musicObject)
musicObject = musicObject.Merge(newMusicObject)
}
return res, nil
return musicObject.Compile(), nil
}
type SearchConfig struct {
@@ -205,3 +190,93 @@ func Search(search string, config SearchConfig) ([]data.MusicObject, error) {
return res, nil
}
type downloadState struct {
artist *data.Artist
album *data.Album
song *data.Song
}
var variousArtist = data.Artist{
Name: "VariousArtist",
}.Compile().(data.Artist)
var variousAlbum = data.Album{
Name: "VariousAlbum",
}.Compile().(data.Album)
func downloadSong(song data.Song, state downloadState) error {
fmt.Println("downloading: " + song.Name)
return nil
}
func Download(musicObject data.MusicObject, statesInput ...downloadState) []error {
state := downloadState{}
if len(statesInput) > 0 {
state = statesInput[0]
}
musicObject, err := Fetch(musicObject)
if err != nil {
return []error{err}
}
if song, ok := musicObject.(data.Song); ok {
state.song = &song
if state.artist == nil {
if len(song.Artists) > 0 {
state.artist = &song.Artists[0]
} else {
state.artist = &variousArtist
}
}
if state.album == nil {
if song.Album.Name != "" {
state.album = &song.Album
} else {
state.album = &variousAlbum
}
}
err := downloadSong(song, state)
if err == nil {
return []error{}
} else {
return []error{err}
}
}
if album, ok := musicObject.(data.Album); ok {
state.album = &album
if state.artist == nil {
if len(album.Artists) > 0 {
state.artist = &album.Artists[0]
} else {
state.artist = &variousArtist
}
}
errList := []error{}
for _, song := range album.Songs {
errList = append(errList, Download(song, state)...)
}
return errList
}
if artist, ok := musicObject.(data.Artist); ok {
state.artist = &artist
errList := []error{}
for _, album := range artist.Albums {
errList = append(errList, Download(album, state)...)
}
return errList
}
return []error{
errors.New("music object not recognized"),
}
}

View File

@@ -3,6 +3,7 @@ package plugin
import (
"errors"
"fmt"
"net/url"
"regexp"
"strings"
@@ -23,7 +24,8 @@ func extractName(s string) string {
const musifyHost = "https://musify.club"
type Musify struct {
session *scraper.Session
session *scraper.Session
sourceType data.SourceType
}
func (m Musify) Name() string {
@@ -42,15 +44,16 @@ func (m Musify) RegexAlbum() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/release/[a-z\-0-9]+`)
}
func (m *Musify) Init() {
func (m *Musify) Init(sourceType data.SourceType) {
m.session = scraper.NewSession()
m.sourceType = sourceType
}
func (m Musify) RegexSong() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
}
func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
func (m Musify) parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist := data.Artist{}
var err error
@@ -62,6 +65,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + url,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
}
@@ -86,7 +90,7 @@ func parseArtistContact(contact *goquery.Selection) (data.Artist, error) {
return artist, err
}
func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
func (m Musify) parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
album := data.Album{}
var err error
@@ -114,7 +118,8 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
if url, urlExists := anchor.Attr("href"); urlExists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + url,
ObjectType: data.ArtistSource,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
@@ -168,7 +173,7 @@ func parseAlbumContact(contact *goquery.Selection) (data.Album, error) {
return album, err
}
func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
func (m Musify) parseContactContainer(contactContainer *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{}
contactContainer.Find("div.contacts__item").Each(func(i int, contact *goquery.Selection) {
@@ -179,11 +184,11 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
if exists {
if strings.Contains(url, "artist") {
if artist, err := parseArtistContact(contact); err == nil {
if artist, err := m.parseArtistContact(contact); err == nil {
res = append(res, artist)
}
} else if strings.Contains(url, "release") {
if album, err := parseAlbumContact(contact); err == nil {
if album, err := m.parseAlbumContact(contact); err == nil {
res = append(res, album)
}
}
@@ -194,7 +199,7 @@ func parseContactContainer(contactContainer *goquery.Selection) []data.MusicObje
return res
}
func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
func (m Musify) parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song := data.Song{}
var err error
@@ -212,7 +217,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{
{Url: musifyHost + url, ObjectType: data.ArtistSource},
{Url: musifyHost + url, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
},
})
}
@@ -225,6 +230,7 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
song.Sources = append(song.Sources, data.Source{
Url: musifyHost + href,
ObjectType: data.SongSource,
SourceType: &m.sourceType,
})
}
@@ -236,11 +242,11 @@ func parsePlaylistItem(playlistItem *goquery.Selection) (data.Song, error) {
return song, err
}
func parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
func (m Musify) parsePlaylist(playlist *goquery.Selection) []data.MusicObject {
res := []data.MusicObject{}
playlist.Find("div.playlist__item").Each(func(i int, playlistItem *goquery.Selection) {
if song, err := parsePlaylistItem(playlistItem); err == nil {
if song, err := m.parsePlaylistItem(playlistItem); err == nil {
res = append(res, song)
}
})
@@ -264,16 +270,49 @@ func (m *Musify) Search(query common.Query) ([]data.MusicObject, error) {
}
doc.Find("div.contacts").Each(func(i int, contactContainer *goquery.Selection) {
musicObjects = append(musicObjects, parseContactContainer(contactContainer)...)
musicObjects = append(musicObjects, m.parseContactContainer(contactContainer)...)
})
doc.Find("div.playlist").Each(func(i int, playlist *goquery.Selection) {
musicObjects = append(musicObjects, parsePlaylist(playlist)...)
musicObjects = append(musicObjects, m.parsePlaylist(playlist)...)
})
return musicObjects, nil
}
type parsedSongUrl struct {
id string
name string
url string
}
func newParsedSongUrl(rawUrl string) (parsedSongUrl, error) {
res := parsedSongUrl{
url: rawUrl,
}
parsed, err := url.Parse(rawUrl)
if err != nil {
return res, err
}
dirs := strings.Split(parsed.Path, "/")
correctPart := dirs[len(dirs)-1]
split := strings.Split(correctPart, "-")
if len(split) < 2 {
return res, errors.New("last part of path has to consist of at least one - " + correctPart)
}
res.id = strings.TrimSpace(split[len(split)-1])
res.name = strings.Join(split[:len(split)-1], "-")
if !common.IsNumeric(res.id) {
return res, errors.New("last elem (id) has to be numeric " + res.id)
}
return res, nil
}
func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
song := data.Song{
Sources: []data.Source{
@@ -292,14 +331,19 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
}
// Download URL
/*
doc.Find("a[itemprop='audio']").Each(func(i int, anchor *goquery.Selection) {
href, exists := anchor.Attr("href")
if exists {
source.AudioURL = p.host + href
doc.Find("a[itemprop='audio']").Each(func(i int, anchor *goquery.Selection) {
if href, _ := anchor.Attr("href"); true {
// will be the source first added at the begining
song.Sources[0].AudioUrl = musifyHost + href
} else {
// http://musify.club/track/dl/7141298/crystal-f-sekundenschlaf.mp3
parsed, err := newParsedSongUrl(song.Sources[0].Url)
if err != nil {
return
}
})
*/
song.Sources[0].AudioUrl = "http://musify.club/track/dl/" + parsed.id + "/" + parsed.name + ".mp3"
}
})
// Song detail
var listElement *goquery.Selection
@@ -313,7 +357,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
song.Artists = append(song.Artists, data.Artist{
Name: strings.TrimSpace(artistAnchor.Text()),
Sources: []data.Source{
{Url: musifyHost + href},
{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType},
},
})
}
@@ -328,8 +372,6 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
return song, errors.New("too many breadcrumbs on page")
}
fmt.Println("found breadcrumbs")
if artistAnchor := listPoints.Eq(2).Find("a"); artistAnchor != nil && artistAnchor.Length() > 0 {
artist := data.Artist{}
useArtist := true
@@ -340,7 +382,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
useArtist = false
}
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href})
artist.Sources = append(artist.Sources, data.Source{Url: musifyHost + href, ObjectType: data.ArtistSource, SourceType: &m.sourceType})
} else {
useArtist = false
}
@@ -357,11 +399,11 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
}
if albumAnchor := listPoints.Eq(3).Find("a"); albumAnchor != nil && albumAnchor.Length() > 0 {
fmt.Println("found album")
if href, exists := albumAnchor.Attr("href"); exists {
song.Album.Sources = append(song.Album.Sources, data.Source{
Url: musifyHost + href,
Url: musifyHost + href,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
@@ -376,7 +418,7 @@ func (m *Musify) FetchSong(source data.Source) (data.Song, error) {
return song, nil
}
func parseSongCard(songCard *goquery.Selection) data.Song {
func (m Musify) parseSongCard(songCard *goquery.Selection) data.Song {
song := data.Song{
Artists: []data.Artist{},
Sources: []data.Source{},
@@ -405,7 +447,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
if anchorList.Length() > 1 {
trackAnchor := anchorList.Last()
if href, exists := trackAnchor.Attr("href"); exists {
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href})
song.Sources = append(song.Sources, data.Source{Url: musifyHost + href, ObjectType: data.SongSource, SourceType: &m.sourceType})
}
song.Name = strings.TrimSpace(trackAnchor.Text())
}
@@ -421,7 +463,9 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
if metaArtistSrc.Length() > 0 {
if content, exists := metaArtistSrc.Attr("content"); exists && content != "" {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content,
Url: musifyHost + content,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
}
}
@@ -459,7 +503,7 @@ func parseSongCard(songCard *goquery.Selection) data.Song {
return song
}
func parseAlbum(doc *goquery.Document) data.Album {
func (m Musify) parseAlbum(doc *goquery.Document) data.Album {
album := data.Album{
Artists: []data.Artist{},
Sources: []data.Source{},
@@ -481,12 +525,12 @@ func parseAlbum(doc *goquery.Document) data.Album {
if href, exists := artistAnchor.Attr("href"); exists {
hrefParts := strings.Split(href, "/")
if len(hrefParts) > 1 && hrefParts[len(hrefParts)-2] == "artist" {
artist := data.Artist{
Sources: []data.Source{},
}
artist := data.Artist{}
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + strings.TrimSpace(href),
Url: musifyHost + strings.TrimSpace(href),
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
// Artist name from span
@@ -509,7 +553,9 @@ func parseAlbum(doc *goquery.Document) data.Album {
if metaURL.Length() > 0 {
if content, exists := metaURL.Attr("content"); exists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + content,
Url: musifyHost + content,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
}
@@ -535,7 +581,9 @@ func parseAlbum(doc *goquery.Document) data.Album {
if artistURLMeta.Length() > 0 {
if content, exists := artistURLMeta.Attr("content"); exists {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + content,
Url: musifyHost + content,
ObjectType: data.ArtistSource,
SourceType: &m.sourceType,
})
}
}
@@ -589,7 +637,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
}
// Parse album metadata
parsedAlbum := parseAlbum(doc)
parsedAlbum := m.parseAlbum(doc)
album.Name = parsedAlbum.Name
album.Artists = parsedAlbum.Artists
album.Sources = append(album.Sources, parsedAlbum.Sources...)
@@ -598,7 +646,7 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
cardBody := doc.Find("div.card-body")
if cardBody.Length() > 0 {
cardBody.Find("div.playlist__item").Each(func(i int, songCard *goquery.Selection) {
song := parseSongCard(songCard)
song := m.parseSongCard(songCard)
album.Songs = append(album.Songs, song)
})
}
@@ -608,8 +656,288 @@ func (m Musify) FetchAlbum(source data.Source) (data.Album, error) {
return album, nil
}
func (m Musify) FetchArtist(source data.Source) (data.Artist, error) {
return data.Artist{
Name: extractName(source.Url),
}, nil
type parsedArtistUrl struct {
id string
name string
url string
}
func newParsedArtistUrl(rawUrl string) (parsedArtistUrl, error) {
res := parsedArtistUrl{
url: rawUrl,
}
parsed, err := url.Parse(rawUrl)
if err != nil {
return res, err
}
dirs := strings.Split(parsed.Path, "/")
correctPart := dirs[len(dirs)-1]
split := strings.Split(correctPart, "-")
if len(split) < 2 {
return res, errors.New("last part of path has to consist of at least one - " + correctPart)
}
res.id = split[len(split)-1]
res.name = strings.Join(split[:len(split)-1], "-")
if !common.IsNumeric(res.id) {
return res, errors.New("last elem (id) has to be numeric " + res.id)
}
return res, nil
}
func (m *Musify) fetchInitialArtist(parsed parsedArtistUrl, artist data.Artist) (data.Artist, error) {
endpoint := fmt.Sprintf("https://musify.club/artist/%s?_pjax=#bodyContent", parsed.name)
resp, err := m.session.Get(endpoint)
if err != nil {
return artist, err
}
doc, err := scraper.GetHtml(resp)
if err != nil {
return artist, err
}
// Breadcrumbs
breadcrumbs := doc.Find("ol.breadcrumb")
if breadcrumbs.Length() > 0 {
breadcrumbList := breadcrumbs.Find("li.breadcrumb-item")
if breadcrumbList.Length() == 3 {
artist.Name = strings.TrimSpace(breadcrumbList.Eq(2).Text())
} else {
return artist, errors.New("breadcrumb layout on artist page changed")
}
}
// Nav tabs for songs link
navTabs := doc.Find("ul.nav-tabs")
if navTabs.Length() > 0 {
navTabs.Find("li.nav-item").Each(func(i int, listItem *goquery.Selection) {
if strings.HasPrefix(strings.TrimSpace(listItem.Text()), "песни") {
// "песни" translates to "songs"
anchor := listItem.Find("a")
if href, exists := anchor.Attr("href"); exists {
artist.Sources = append(artist.Sources, data.Source{
Url: musifyHost + href,
})
}
}
})
}
// Content title
contentTitle := doc.Find("header.content__title")
if contentTitle.Length() > 0 {
h1Name := contentTitle.Find("h1")
if h1Name.Length() > 0 {
artist.Name = strings.TrimSpace(h1Name.Text())
}
}
// Country and additional sources from icon list
iconList := doc.Find("ul.icon-list")
if iconList.Length() > 0 {
// Country flag - simplified version
countryFlag := iconList.Find("i.flag-icon")
if countryFlag.Length() > 0 {
// Extract country code from class names
classes, _ := countryFlag.Attr("class")
classList := strings.Fields(classes)
for _, class := range classList {
if class != "flag-icon" && class != "shadow" && len(class) == 2 {
// This would be where you'd use a country lookup library
// artist.Country = getCountryFromCode(class)
break
}
}
}
// Additional sources
iconList.Find("a.link").Each(func(i int, additionalSource *goquery.Selection) {
if href, exists := additionalSource.Attr("href"); exists {
// Simplified source matching - you'd implement your Source.matchUrl equivalent
artist.Sources = append(artist.Sources, data.Source{Url: href})
}
})
}
/*
// Notes
noteSection := doc.Find("#text-main")
if noteSection.Length() > 0 {
html, _ := noteSection.Html()
// artist.Notes = FormattedText{HTML: html} - if you have this field
}
// Artist artwork
doc.Find("img.artist-img").Each(func(i int, img *goquery.Selection) {
src, exists := img.Attr("data-src")
if !exists {
src, _ = img.Attr("src")
}
if src != "" {
// artist.Artwork = append(artist.Artwork, Artwork{Url: src})
}
})
*/
return artist, nil
}
func (m Musify) parseAlbumCard(albumCard *goquery.Selection, artistName string) (data.Album, error) {
album := data.Album{
Sources: []data.Source{},
Artists: []data.Artist{},
Songs: []data.Song{},
}
/*
// Album type from data attribute
if albumTypeID, exists := albumCard.Attr("data-type"); exists {
if parsedType, err := strconv.Atoi(albumTypeID); err == nil {
// album.Type = getAlbumTypeFromID(parsedType) - implement your mapping
if parsedType == 5 {
// album.Status = AlbumStatusBootleg
}
}
}
*/
// Parse release anchor
parseReleaseAnchor := func(anchor *goquery.Selection, textIsName bool) {
if anchor == nil {
return
}
if href, exists := anchor.Attr("href"); exists {
album.Sources = append(album.Sources, data.Source{
Url: musifyHost + href,
ObjectType: data.AlbumSource,
SourceType: &m.sourceType,
})
}
if textIsName {
album.Name = common.CleanSongTitle(strings.TrimSpace(anchor.Text()), artistName)
}
}
// Main thumbnail anchor
anchorList := albumCard.Find("a")
if anchorList.Length() > 0 {
mainAnchor := anchorList.First()
parseReleaseAnchor(mainAnchor, false)
/*
// Thumbnail image
thumbnail := mainAnchor.Find("img")
if thumbnail.Length() > 0 {
if alt, exists := thumbnail.Attr("alt"); exists {
album.Name = common.CleanSongTitle(alt, artistName)
}
// Image URL could be stored if needed
// if src, exists := thumbnail.Attr("src"); exists { ... }
}
*/
} else {
return album, errors.New("the card has no thumbnail or url")
}
// Card body
cardBody := albumCard.Find("div.card-body")
if cardBody.Length() > 0 {
parseReleaseAnchor(cardBody.Find("a"), true)
}
/*
// Parse date from card footer
parseSmallDate := func(small *goquery.Selection) {
italic := small.Find("i")
if italic.Length() == 0 {
return
}
if title, exists := italic.Attr("title"); exists && title == "Добавлено" {
rawTime := strings.TrimSpace(small.Text())
// Parse date from "13.11.2021" format
// album.Date = parseDate(rawTime, "02.01.2006")
}
}
// Card footers
cardFooters := albumCard.Find("div.card-footer")
if cardFooters.Length() == 3 {
lastFooter := cardFooters.Last()
lastFooter.Find("small").Each(func(i int, small *goquery.Selection) {
parseSmallDate(small)
})
} else {
m.logger.Debug(fmt.Sprintf("expected 3 card footers, got %d", cardFooters.Length()))
}
*/
return album, nil
}
func (m *Musify) fetchArtistDiscography(url parsedArtistUrl, artistName string, albumTypeBlacklist []string) ([]data.Album, error) {
albumList := []data.Album{}
endpoint := "https://musify.club/discography/filteralbums"
// POST request with form data
formData := map[string]string{
"ArtistID": url.id,
"SortOrder.Property": "dateCreated",
"SortOrder.IsAscending": "false",
"X-Requested-With": "XMLHttpRequest",
}
resp, err := m.session.PostForm(endpoint, formData, map[string]string{"X-Requested-With": "XMLHttpRequest"})
if err != nil {
return albumList, err
}
doc, err := scraper.GetHtml(resp)
if err != nil {
return albumList, err
}
doc.Find("div.card").Each(func(i int, card *goquery.Selection) {
album, err := m.parseAlbumCard(card, artistName)
if err != nil {
fmt.Println(err)
}
albumList = append(albumList, album)
})
return albumList, nil
}
func (m *Musify) FetchArtist(source data.Source) (data.Artist, error) {
res := data.Artist{
Name: extractName(source.Url),
}
parsed, err := newParsedArtistUrl(source.Url)
if err != nil {
return res, err
}
res, err = m.fetchInitialArtist(parsed, res)
if err != nil {
return res, err
}
albumList, err := m.fetchArtistDiscography(parsed, res.Name, []string{})
if err != nil {
return res, err
}
res.Albums = append(res.Albums, albumList...)
return res, nil
}

View File

@@ -40,7 +40,7 @@ func (m MusifyTest) RegexSong() *regexp.Regexp {
return regexp.MustCompile(`(?i)https?://musify\.club/track/[a-z\-0-9]+`)
}
func (m *MusifyTest) Init() {
func (m *MusifyTest) Init(sourceType data.SourceType) {
}
@@ -87,7 +87,7 @@ func TestRegister(t *testing.T) {
func TestFetchSong(t *testing.T) {
RegisterPlugin(&MusifyTest{})
s, err := Fetch(data.Source{
s, err := FetchSource(data.Source{
Url: "https://musify.club/track/linkin-park-in-the-end-3058",
})
@@ -108,7 +108,7 @@ func TestFetchSong(t *testing.T) {
func TestFetchAlbum(t *testing.T) {
RegisterPlugin(&MusifyTest{})
a, err := Fetch(data.Source{
a, err := FetchSource(data.Source{
Url: "https://musify.club/release/linkin-park-hybrid-theory-2000-188",
})
@@ -129,7 +129,7 @@ func TestFetchAlbum(t *testing.T) {
func TestFetchArtist(t *testing.T) {
RegisterPlugin(&MusifyTest{})
a, err := Fetch(data.Source{
a, err := FetchSource(data.Source{
Url: "https://musify.club/artist/linkin-park-5",
})
@@ -150,7 +150,7 @@ func TestFetchArtist(t *testing.T) {
func TestFetchWrongUrl(t *testing.T) {
RegisterPlugin(&MusifyTest{})
_, err := Fetch(data.Source{
_, err := FetchSource(data.Source{
Url: "https://musify.club/",
})
@@ -162,7 +162,7 @@ func TestFetchWrongUrl(t *testing.T) {
func TestNonExistentSourceType(t *testing.T) {
RegisterPlugin(&MusifyTest{})
_, err := Fetch(data.Source{
_, err := FetchSource(data.Source{
Url: "https://musify.club/",
SourceType: &data.SourceType{
Name: "doesn't exist",

View File

@@ -8,6 +8,7 @@ import (
"net/http"
"net/http/cookiejar"
"net/url"
"strings"
"time"
"golang.org/x/net/publicsuffix"
@@ -118,6 +119,32 @@ func (s *Session) PostMultipartForm(url string, data map[string]string, headers
return s.client.Do(req)
}
func (s *Session) PostForm(rawUrl string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
fullURL := s.buildURL(rawUrl)
// Prepare form data
formData := url.Values{}
for k, v := range data {
formData.Add(k, v)
}
req, err := http.NewRequest("POST", fullURL, strings.NewReader(formData.Encode()))
if err != nil {
return nil, err
}
s.setDefaultHeaders(req)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
// Add any additional headers provided
if len(headers) > 0 {
for key, value := range headers[0] {
req.Header.Set(key, value)
}
}
return s.client.Do(req)
}
// PostJSON performs a POST request with JSON data
func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
fullURL := s.buildURL(url)

View File

@@ -5,5 +5,8 @@ import (
)
func main() {
cli.Shell()
cli.Shell([]string{
"#a Crystal F",
"d: 0",
})
}