From f5c8ba2282cc048bfc59c64aa97cc2a5d655bb71 Mon Sep 17 00:00:00 2001 From: Elara Musayelyan Date: Sun, 25 Jun 2023 13:07:38 -0700 Subject: [PATCH] Add html builtin module --- .gitignore | 3 +- go.mod | 2 + go.sum | 10 +- internal/builtins/html.go | 229 ++++++++++++++++++++++++++++++++++ internal/builtins/reader.go | 13 ++ internal/builtins/register.go | 1 + 6 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 internal/builtins/html.go diff --git a/.gitignore b/.gitignore index 6b84323..a627943 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /lure-updater.toml /lure-updater -/dist/ \ No newline at end of file +/dist/ +*.star \ No newline at end of file diff --git a/go.mod b/go.mod index b6ecb81..65e382b 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module go.elara.ws/lure-updater go 1.20 require ( + github.com/PuerkitoBio/goquery v1.8.1 github.com/caarlos0/env/v8 v8.0.0 github.com/go-git/go-git/v5 v5.7.0 github.com/pelletier/go-toml/v2 v2.0.8 @@ -21,6 +22,7 @@ require ( github.com/Microsoft/go-winio v0.5.2 // indirect github.com/ProtonMail/go-crypto v0.0.0-20230518184743-7afd39499903 // indirect github.com/acomagu/bufpipe v1.0.4 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect github.com/cloudflare/circl v1.3.3 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect diff --git a/go.sum b/go.sum index 925bddd..cf90622 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,12 @@ github.com/Microsoft/go-winio v0.5.2 h1:a9IhgEQBCUEk6QCdml9CiJGhAws+YwffDHEMp1VM github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= github.com/ProtonMail/go-crypto v0.0.0-20230518184743-7afd39499903 h1:ZK3C5DtzV2nVAQTx5S5jQvMeDqWtD1By5mOoyY/xJek= github.com/ProtonMail/go-crypto v0.0.0-20230518184743-7afd39499903/go.mod h1:8TI4H3IbrackdNgv+92dI+rhpCaLqM0IfpgCgenFvRE= +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= github.com/acomagu/bufpipe v1.0.4 h1:e3H4WUzM3npvo5uv95QuJM3cQspFNtFBzvJ2oNjKIDQ= github.com/acomagu/bufpipe v1.0.4/go.mod h1:mxdxdup/WdsKVreO5GpW4+M/1CE2sMG4jeGJ2sYmHc4= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/bwesterb/go-ristretto v1.2.0/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= @@ -124,8 +128,8 @@ go.elara.ws/logger v0.0.0-20230421022458-e80700db2090 h1:RVC8XvWo6Yw4HUshqx4TSzu go.elara.ws/logger v0.0.0-20230421022458-e80700db2090/go.mod h1:qng49owViqsW5Aey93lwBXONw20oGbJIoLVscB16mPM= go.elara.ws/pcre v0.0.0-20230421030233-daf2d2e6973f h1:ZwR0xvBeP5BHHv63fgfuwhZIj+Si5rp79WSDUE73ZVA= go.elara.ws/pcre v0.0.0-20230421030233-daf2d2e6973f/go.mod h1:EF48C6VnP4wBayzFGk6lXqbiLucH7EfiaYOgiiCe5k4= -go.elara.ws/vercmp v0.0.0-20230622213404-1ef50f776de8 h1:Q4fiNpQH1qOlPUVt/cVoW3VtIvOe+jo7mFTX0Sor7O4= -go.elara.ws/vercmp v0.0.0-20230622213404-1ef50f776de8/go.mod h1:/7PNW7nFnDR5W7UXZVc04gdVLR/wBNgkm33KgIz0OBk= +go.elara.ws/vercmp v0.0.0-20230622214216-0b2b067575c4 h1:Ep54XceQlKhcCHl9awG+wWP4kz4kIP3c3Lzw/Gc/zwY= +go.elara.ws/vercmp v0.0.0-20230622214216-0b2b067575c4/go.mod h1:/7PNW7nFnDR5W7UXZVc04gdVLR/wBNgkm33KgIz0OBk= go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= @@ -155,9 +159,11 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= diff --git a/internal/builtins/html.go b/internal/builtins/html.go new file mode 100644 index 0000000..efcfaea --- /dev/null +++ b/internal/builtins/html.go @@ -0,0 +1,229 @@ +package builtins + +import ( + "io" + "strings" + + "github.com/PuerkitoBio/goquery" + "go.starlark.net/starlark" + "go.starlark.net/starlarkstruct" +) + +var ( + _ starlark.Iterable = (*starlarkSelection)(nil) + _ starlark.Sliceable = (*starlarkSelection)(nil) + _ starlark.Sequence = (*starlarkSelection)(nil) + _ starlark.Value = (*starlarkSelection)(nil) +) + +var htmlModule = &starlarkstruct.Module{ + Name: "html", + Members: starlark.StringDict{ + "parse": starlark.NewBuiltin("html.parse", htmlParse), + }, +} + +func htmlParse(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var val starlark.Value + err := starlark.UnpackArgs("html.selection.find", args, kwargs, "from", &val) + if err != nil { + return nil, err + } + + var r io.ReadCloser + switch val := val.(type) { + case starlark.String: + r = io.NopCloser(strings.NewReader(string(val))) + case starlark.Bytes: + r = io.NopCloser(strings.NewReader(string(val))) + case starlarkReader: + r = val + } + defer r.Close() + + doc, err := goquery.NewDocumentFromReader(r) + if err != nil { + return nil, err + } + + return newStarlarkSelection(doc.Selection), nil +} + +type starlarkSelection struct { + sel *goquery.Selection + *starlarkstruct.Struct +} + +func newStarlarkSelection(sel *goquery.Selection) starlarkSelection { + ss := starlarkSelection{sel: sel} + ss.Struct = starlarkstruct.FromStringDict(starlark.String("html.selection"), starlark.StringDict{ + "text": starlark.NewBuiltin("html.selection.text", ss.text), + "html": starlark.NewBuiltin("html.selection.html", ss.html), + "children": starlark.NewBuiltin("html.selection.children", ss.children), + "parent": starlark.NewBuiltin("html.selection.parent", ss.parent), + "find": starlark.NewBuiltin("html.selection.find", ss.find), + "add": starlark.NewBuiltin("html.selection.add", ss.add), + "attr": starlark.NewBuiltin("html.selection.attr", ss.attr), + "has_class": starlark.NewBuiltin("html.selection.has_class", ss.hasClass), + "index_selector": starlark.NewBuiltin("html.selection.index_selector", ss.indexSelector), + "and_self": starlark.NewBuiltin("html.selection.and_self", ss.andSelf), + "first": starlark.NewBuiltin("html.selection.first", ss.first), + "last": starlark.NewBuiltin("html.selection.last", ss.last), + "next": starlark.NewBuiltin("html.selection.last", ss.next), + "next_all": starlark.NewBuiltin("html.selection.next_all", ss.nextAll), + "next_until": starlark.NewBuiltin("html.selection.next_until", ss.nextUntil), + "prev": starlark.NewBuiltin("html.selection.prev", ss.prev), + "prev_all": starlark.NewBuiltin("html.selection.prev_all", ss.prevAll), + "prev_until": starlark.NewBuiltin("html.selection.prev_until", ss.prevUntil), + }) + return ss +} + +func (ss starlarkSelection) Truth() starlark.Bool { + return len(ss.sel.Nodes) > 0 +} + +func (ss starlarkSelection) Len() int { + return ss.sel.Length() +} + +func (ss starlarkSelection) Index(i int) starlark.Value { + return newStarlarkSelection(ss.sel.Slice(i, i+1)) +} + +func (ss starlarkSelection) Slice(start, end, _ int) starlark.Value { + return newStarlarkSelection(ss.sel.Slice(start, end)) +} + +func (ss starlarkSelection) Iterate() starlark.Iterator { + return newSelectionIterator(ss.sel) +} + +func (ss starlarkSelection) text(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return starlark.String(ss.sel.Text()), nil +} + +func (ss starlarkSelection) html(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + s, err := ss.sel.Html() + return starlark.String(s), err +} + +func (ss starlarkSelection) children(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.Children()), nil +} + +func (ss starlarkSelection) parent(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.Parent()), nil +} + +func (ss starlarkSelection) find(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var selector string + err := starlark.UnpackArgs("html.selection.find", args, kwargs, "selector", &selector) + if err != nil { + return nil, err + } + + return newStarlarkSelection(ss.sel.Find(selector)), nil +} + +func (ss starlarkSelection) add(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var selector string + err := starlark.UnpackArgs("html.selection.add", args, kwargs, "selector", &selector) + if err != nil { + return nil, err + } + return newStarlarkSelection(ss.sel.Add(selector)), nil +} + +func (ss starlarkSelection) indexSelector(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var selector string + err := starlark.UnpackArgs("html.selection.index_selector", args, kwargs, "selector", &selector) + if err != nil { + return nil, err + } + return starlark.MakeInt(ss.sel.IndexSelector(selector)), nil +} + +func (ss starlarkSelection) attr(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var name, def string + err := starlark.UnpackArgs("html.selection.find", args, kwargs, "name", &name, "default??", &def) + if err != nil { + return nil, err + } + return starlark.String(ss.sel.AttrOr(name, def)), nil +} + +func (ss starlarkSelection) hasClass(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var name string + err := starlark.UnpackArgs("html.selection.has_class", args, kwargs, "name", &name) + if err != nil { + return nil, err + } + return starlark.Bool(ss.sel.HasClass(name)), nil +} + +func (ss starlarkSelection) andSelf(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.AndSelf()), nil +} + +func (ss starlarkSelection) first(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.First()), nil +} + +func (ss starlarkSelection) last(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.Last()), nil +} + +func (ss starlarkSelection) next(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.Next()), nil +} + +func (ss starlarkSelection) nextAll(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.NextAll()), nil +} + +func (ss starlarkSelection) nextUntil(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var selector string + err := starlark.UnpackArgs("html.selection.next_until", args, kwargs, "selector", &selector) + if err != nil { + return nil, err + } + return newStarlarkSelection(ss.sel.NextUntil(selector)), nil +} + +func (ss starlarkSelection) prev(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.Prev()), nil +} + +func (ss starlarkSelection) prevAll(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + return newStarlarkSelection(ss.sel.PrevAll()), nil +} + +func (ss starlarkSelection) prevUntil(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var selector string + err := starlark.UnpackArgs("html.selection.prev_until", args, kwargs, "selector", &selector) + if err != nil { + return nil, err + } + return newStarlarkSelection(ss.sel.PrevUntil(selector)), nil +} + +type starlarkSelectionIterator struct { + sel *goquery.Selection + index int +} + +func newSelectionIterator(sel *goquery.Selection) *starlarkSelectionIterator { + return &starlarkSelectionIterator{sel: sel} +} + +func (ssi *starlarkSelectionIterator) Next(v *starlark.Value) bool { + if ssi.index == ssi.sel.Length() { + return false + } + *v = newStarlarkSelection(ssi.sel.Slice(ssi.index, ssi.index+1)) + ssi.index++ + return true +} + +func (ssi *starlarkSelectionIterator) Done() {} diff --git a/internal/builtins/reader.go b/internal/builtins/reader.go index ee05757..e07ccf5 100644 --- a/internal/builtins/reader.go +++ b/internal/builtins/reader.go @@ -200,3 +200,16 @@ func (sr starlarkReader) closeReader(thread *starlark.Thread, b *starlark.Builti } return starlark.None, nil } + +// Read implements the io.ReadCloser interface +func (sr starlarkReader) Read(b []byte) (int, error) { + return sr.br.Read(b) +} + +// Close implements the io.ReadCloser interface +func (sr starlarkReader) Close() error { + if sr.closeFunc != nil { + return sr.closeFunc() + } + return nil +} diff --git a/internal/builtins/register.go b/internal/builtins/register.go index 11bca43..5cda420 100644 --- a/internal/builtins/register.go +++ b/internal/builtins/register.go @@ -44,5 +44,6 @@ func Register(sd starlark.StringDict, opts *Options) { sd["log"] = logModule(opts.Name) sd["json"] = starlarkjson.Module sd["utils"] = utilsModule + sd["html"] = htmlModule sd["register_webhook"] = registerWebhook(opts.Mux, opts.Config, opts.Name) }