2022-05-18 23:28:40 +00:00
|
|
|
// Package pcre is a library that provides pcre2 regular expressions
|
|
|
|
// in pure Go, allowing for features such as cross-compiling.
|
|
|
|
//
|
|
|
|
// The lib directory contains source code automatically translated from
|
|
|
|
// pcre2's C source code for each supported architecture and/or OS.
|
|
|
|
// This package wraps the automatically-translated source to provide a
|
|
|
|
// safe interface as close to Go's regexp library as possible.
|
|
|
|
package pcre
|
|
|
|
|
|
|
|
import (
|
2023-07-26 05:35:29 +00:00
|
|
|
"math"
|
2022-05-18 23:28:40 +00:00
|
|
|
"os"
|
|
|
|
"runtime"
|
|
|
|
"strconv"
|
|
|
|
"sync"
|
|
|
|
"unsafe"
|
|
|
|
|
2023-04-21 03:02:33 +00:00
|
|
|
"go.elara.ws/pcre/lib"
|
2022-05-18 23:28:40 +00:00
|
|
|
|
|
|
|
"modernc.org/libc"
|
|
|
|
)
|
|
|
|
|
2023-07-26 05:35:29 +00:00
|
|
|
const Unset = math.MaxUint
|
|
|
|
|
2022-05-19 00:51:31 +00:00
|
|
|
// Version returns the version of pcre2 embedded in this library.
|
|
|
|
func Version() string { return lib.DPACKAGE_VERSION }
|
|
|
|
|
2022-05-18 23:28:40 +00:00
|
|
|
// Regexp represents a pcre2 regular expression
|
|
|
|
type Regexp struct {
|
|
|
|
mtx *sync.Mutex
|
|
|
|
expr string
|
|
|
|
re uintptr
|
2023-07-17 06:23:47 +00:00
|
|
|
mctx uintptr
|
2022-05-18 23:28:40 +00:00
|
|
|
tls *libc.TLS
|
2023-07-17 06:23:47 +00:00
|
|
|
|
|
|
|
calloutMtx *sync.Mutex
|
|
|
|
callout *func(tls *libc.TLS, cbptr, data uintptr) int32
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Compile runs CompileOpts with no options.
|
|
|
|
//
|
|
|
|
// Close() should be called on the returned expression
|
|
|
|
// once it is no longer needed.
|
|
|
|
func Compile(pattern string) (*Regexp, error) {
|
|
|
|
return CompileOpts(pattern, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
// CompileOpts compiles the provided pattern using the given options.
|
|
|
|
//
|
|
|
|
// Close() should be called on the returned expression
|
|
|
|
// once it is no longer needed.
|
|
|
|
func CompileOpts(pattern string, options CompileOption) (*Regexp, error) {
|
|
|
|
tls := libc.NewTLS()
|
|
|
|
|
|
|
|
// Get C string of pattern
|
|
|
|
cPattern, err := libc.CString(pattern)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
// Free the string when done
|
|
|
|
defer libc.Xfree(tls, cPattern)
|
|
|
|
|
|
|
|
// Allocate new error
|
|
|
|
cErr := allocError(tls)
|
|
|
|
// Free error when done
|
|
|
|
defer libc.Xfree(tls, cErr)
|
|
|
|
|
|
|
|
// Get error offsets
|
|
|
|
errPtr := addErrCodeOffset(cErr)
|
|
|
|
errOffsetPtr := addErrOffsetOffset(cErr)
|
|
|
|
|
|
|
|
// Convert pattern length to size_t type
|
|
|
|
cPatLen := lib.Tsize_t(len(pattern))
|
|
|
|
|
|
|
|
// Compile expression
|
|
|
|
r := lib.Xpcre2_compile_8(tls, cPattern, cPatLen, uint32(options), errPtr, errOffsetPtr, 0)
|
|
|
|
if r == 0 {
|
|
|
|
return nil, ptrToError(tls, cErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create regexp instance
|
|
|
|
regex := Regexp{
|
2023-07-17 06:23:47 +00:00
|
|
|
expr: pattern,
|
|
|
|
mtx: &sync.Mutex{},
|
|
|
|
re: r,
|
|
|
|
mctx: lib.Xpcre2_match_context_create_8(tls, 0),
|
|
|
|
tls: tls,
|
|
|
|
calloutMtx: &sync.Mutex{},
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure resources are freed if GC collects the
|
|
|
|
// regular expression.
|
|
|
|
runtime.SetFinalizer(®ex, func(r *Regexp) error {
|
|
|
|
return r.Close()
|
|
|
|
})
|
|
|
|
|
|
|
|
return ®ex, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// MustCompile compiles the given pattern and panics
|
|
|
|
// if there was an error
|
|
|
|
//
|
|
|
|
// Close() should be called on the returned expression
|
|
|
|
// once it is no longer needed.
|
|
|
|
func MustCompile(pattern string) *Regexp {
|
|
|
|
rgx, err := Compile(pattern)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return rgx
|
|
|
|
}
|
|
|
|
|
|
|
|
// MustCompileOpts compiles the given pattern with the given
|
|
|
|
// options and panics if there was an error.
|
|
|
|
//
|
|
|
|
// Close() should be called on the returned expression
|
|
|
|
// once it is no longer needed.
|
|
|
|
func MustCompileOpts(pattern string, options CompileOption) *Regexp {
|
|
|
|
rgx, err := CompileOpts(pattern, options)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return rgx
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find returns the leftmost match of the regular expression.
|
|
|
|
// A return value of nil indicates no match.
|
|
|
|
func (r *Regexp) Find(b []byte) []byte {
|
|
|
|
matches, err := r.match(b, 0, false)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
match := matches[0]
|
|
|
|
return b[match[0]:match[1]]
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindIndex returns a two-element slice of integers
|
|
|
|
// representing the location of the leftmost match of the
|
|
|
|
// regular expression.
|
|
|
|
func (r *Regexp) FindIndex(b []byte) []int {
|
|
|
|
matches, err := r.match(b, 0, false)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
match := matches[0]
|
|
|
|
|
|
|
|
return []int{int(match[0]), int(match[1])}
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAll returns all matches of the regular expression.
|
|
|
|
// A return value of nil indicates no match.
|
|
|
|
func (r *Regexp) FindAll(b []byte, n int) [][]byte {
|
|
|
|
matches, err := r.match(b, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 || n == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if n > 0 && len(matches) > n {
|
|
|
|
matches = matches[:n]
|
|
|
|
}
|
|
|
|
|
2022-05-20 10:09:55 +00:00
|
|
|
out := make([][]byte, len(matches))
|
|
|
|
for index, match := range matches {
|
|
|
|
out[index] = b[match[0]:match[1]]
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAll returns indices of all matches of the
|
|
|
|
// regular expression. A return value of nil indicates
|
|
|
|
// no match.
|
|
|
|
func (r *Regexp) FindAllIndex(b []byte, n int) [][]int {
|
|
|
|
matches, err := r.match(b, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 || n == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if n > 0 && len(matches) > n {
|
|
|
|
matches = matches[:n]
|
|
|
|
}
|
|
|
|
|
2022-05-20 10:09:55 +00:00
|
|
|
out := make([][]int, len(matches))
|
|
|
|
for index, match := range matches {
|
|
|
|
out[index] = []int{int(match[0]), int(match[1])}
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindSubmatch returns a slice containing the match as the
|
|
|
|
// first element, and the submatches as the subsequent elements.
|
|
|
|
func (r *Regexp) FindSubmatch(b []byte) [][]byte {
|
|
|
|
matches, err := r.match(b, 0, false)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
match := matches[0]
|
|
|
|
|
2022-05-20 10:09:55 +00:00
|
|
|
out := make([][]byte, 0, len(match)/2)
|
2022-05-18 23:28:40 +00:00
|
|
|
for i := 0; i < len(match); i += 2 {
|
2023-07-26 05:35:29 +00:00
|
|
|
if match[i] == Unset {
|
|
|
|
out = append(out, nil)
|
|
|
|
} else {
|
|
|
|
out = append(out, b[match[i]:match[i+1]])
|
|
|
|
}
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindSubmatchIndex returns a slice of index pairs representing
|
|
|
|
// the match and submatches, if any.
|
|
|
|
func (r *Regexp) FindSubmatchIndex(b []byte) []int {
|
|
|
|
matches, err := r.match(b, 0, false)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
match := matches[0]
|
|
|
|
|
|
|
|
out := make([]int, len(match))
|
|
|
|
for index, offset := range match {
|
|
|
|
out[index] = int(offset)
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAllSubmatch returns a slice of all matches and submatches
|
|
|
|
// of the regular expression. It will return no more than n matches.
|
|
|
|
// If n < 0, it will return all matches.
|
|
|
|
func (r *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
|
|
|
|
matches, err := r.match(b, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 || n == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if n > 0 && len(matches) > n {
|
|
|
|
matches = matches[:n]
|
|
|
|
}
|
|
|
|
|
|
|
|
out := make([][][]byte, len(matches))
|
|
|
|
for index, match := range matches {
|
2022-05-20 10:09:55 +00:00
|
|
|
outMatch := make([][]byte, 0, len(match)/2)
|
2022-05-18 23:28:40 +00:00
|
|
|
|
|
|
|
for i := 0; i < len(match); i += 2 {
|
2023-07-26 05:35:29 +00:00
|
|
|
if match[i] == Unset {
|
|
|
|
outMatch = append(outMatch, nil)
|
|
|
|
} else {
|
|
|
|
outMatch = append(outMatch, b[match[i]:match[i+1]])
|
|
|
|
}
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out[index] = outMatch
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAllSubmatch returns a slice of all indeces representing the
|
|
|
|
// locations of matches and submatches, if any, of the regular expression.
|
|
|
|
// It will return no more than n matches. If n < 0, it will return all matches.
|
|
|
|
func (r *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
|
|
|
|
matches, err := r.match(b, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 || n == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if n > 0 && len(matches) > n {
|
|
|
|
matches = matches[:n]
|
|
|
|
}
|
|
|
|
|
|
|
|
out := make([][]int, len(matches))
|
|
|
|
for index, match := range matches {
|
|
|
|
offsets := make([]int, len(match))
|
|
|
|
|
|
|
|
for index, offset := range match {
|
|
|
|
offsets[index] = int(offset)
|
|
|
|
}
|
|
|
|
|
|
|
|
out[index] = offsets
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindString is the String version of Find
|
|
|
|
func (r *Regexp) FindString(s string) string {
|
|
|
|
return string(r.Find([]byte(s)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindStringIndex is the String version of FindIndex
|
|
|
|
func (r *Regexp) FindStringIndex(s string) []int {
|
|
|
|
return r.FindIndex([]byte(s))
|
|
|
|
}
|
|
|
|
|
|
|
|
// FinAllString is the String version of FindAll
|
|
|
|
func (r *Regexp) FindAllString(s string, n int) []string {
|
|
|
|
matches := r.FindAll([]byte(s), n)
|
2023-07-17 06:23:47 +00:00
|
|
|
|
2022-05-18 23:28:40 +00:00
|
|
|
out := make([]string, len(matches))
|
|
|
|
for index, match := range matches {
|
|
|
|
out[index] = string(match)
|
|
|
|
}
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAllStringIndex is the String version of FindIndex
|
|
|
|
func (r *Regexp) FindAllStringIndex(s string, n int) [][]int {
|
|
|
|
return r.FindAllIndex([]byte(s), n)
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindStringSubmatch is the string version of FindSubmatch
|
|
|
|
func (r *Regexp) FindStringSubmatch(s string) []string {
|
|
|
|
matches := r.FindSubmatch([]byte(s))
|
|
|
|
|
|
|
|
out := make([]string, len(matches))
|
|
|
|
for index, match := range matches {
|
|
|
|
out[index] = string(match)
|
|
|
|
}
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindStringSubmatchIndex is the String version of FindSubmatchIndex
|
|
|
|
func (r *Regexp) FindStringSubmatchIndex(s string) []int {
|
|
|
|
return r.FindSubmatchIndex([]byte(s))
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAllStringSubmatch is the String version of FindAllSubmatch
|
|
|
|
func (r *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
|
|
|
|
matches := r.FindAllSubmatch([]byte(s), n)
|
|
|
|
|
|
|
|
out := make([][]string, len(matches))
|
|
|
|
for index, match := range matches {
|
|
|
|
outMatch := make([]string, len(match))
|
|
|
|
|
|
|
|
for index, byteMatch := range match {
|
|
|
|
outMatch[index] = string(byteMatch)
|
|
|
|
}
|
|
|
|
|
|
|
|
out[index] = outMatch
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// FindAllStringSubmatchIndex is the String version of FindAllSubmatchIndex
|
|
|
|
func (r *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
|
|
|
|
return r.FindAllSubmatchIndex([]byte(s), n)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Match reports whether b contains a match of the regular expression
|
|
|
|
func (r *Regexp) Match(b []byte) bool {
|
|
|
|
return r.Find(b) != nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// MatchString is the String version of Match
|
|
|
|
func (r *Regexp) MatchString(s string) bool {
|
|
|
|
return r.Find([]byte(s)) != nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NumSubexp returns the number of parenthesized subexpressions
|
|
|
|
// in the regular expression.
|
|
|
|
func (r *Regexp) NumSubexp() int {
|
|
|
|
return int(r.patternInfo(lib.DPCRE2_INFO_CAPTURECOUNT))
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaceAll returns a copy of src, replacing matches of the
|
|
|
|
// regular expression with the replacement text repl.
|
|
|
|
// Inside repl, $ signs are interpreted as in Expand,
|
|
|
|
// so for instance $1 represents the text of the first
|
2022-05-21 18:45:30 +00:00
|
|
|
// submatch and $name would represent the text of the
|
|
|
|
// subexpression called "name".
|
2022-05-18 23:28:40 +00:00
|
|
|
func (r *Regexp) ReplaceAll(src, repl []byte) []byte {
|
|
|
|
matches, err := r.match(src, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
2022-05-21 18:56:35 +00:00
|
|
|
return src
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out := make([]byte, len(src))
|
|
|
|
copy(out, src)
|
|
|
|
|
|
|
|
var diff int64
|
|
|
|
for _, match := range matches {
|
|
|
|
replStr := os.Expand(string(repl), func(s string) string {
|
|
|
|
i, err := strconv.Atoi(s)
|
|
|
|
if err != nil {
|
2022-05-21 18:45:30 +00:00
|
|
|
i = r.SubexpIndex(s)
|
|
|
|
if i == -1 {
|
|
|
|
return ""
|
|
|
|
}
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If there given match does not exist, return empty string
|
|
|
|
if i == 0 || len(match) < (2*i)+1 {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return match
|
|
|
|
return string(src[match[2*i]:match[(2*i)+1]])
|
|
|
|
})
|
|
|
|
// Replace replacement string with expanded string
|
|
|
|
repl := []byte(replStr)
|
|
|
|
|
|
|
|
// Replace bytes with new replacement string
|
|
|
|
diff, out = replaceBytes(out, repl, match[0], match[1], diff)
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaceAllFunc returns a copy of src in which all matches of the
|
|
|
|
// regular expression have been replaced by the return value of function
|
|
|
|
// repl applied to the matched byte slice. The replacement returned by
|
|
|
|
// repl is substituted directly, without using Expand.
|
|
|
|
func (r *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
|
|
|
|
matches, err := r.match(src, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
2022-05-21 18:56:35 +00:00
|
|
|
return src
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out := make([]byte, len(src))
|
|
|
|
copy(out, src)
|
|
|
|
|
|
|
|
var diff int64
|
|
|
|
for _, match := range matches {
|
|
|
|
replBytes := repl(src[match[0]:match[1]])
|
|
|
|
diff, out = replaceBytes(out, replBytes, match[0], match[1], diff)
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaceAllLiteral returns a copy of src, replacing matches of
|
|
|
|
// the regular expression with the replacement bytes repl.
|
|
|
|
// The replacement is substituted directly, without using Expand.
|
|
|
|
func (r *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
|
|
|
|
matches, err := r.match(src, 0, true)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
if len(matches) == 0 {
|
2022-05-21 18:56:35 +00:00
|
|
|
return src
|
2022-05-18 23:28:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out := make([]byte, len(src))
|
|
|
|
copy(out, src)
|
|
|
|
|
|
|
|
var diff int64
|
|
|
|
for _, match := range matches {
|
|
|
|
diff, out = replaceBytes(out, repl, match[0], match[1], diff)
|
|
|
|
}
|
|
|
|
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaceAllString is the String version of ReplaceAll
|
|
|
|
func (r *Regexp) ReplaceAllString(src, repl string) string {
|
|
|
|
return string(r.ReplaceAll([]byte(src), []byte(repl)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaceAllStringFunc is the String version of ReplaceAllFunc
|
|
|
|
func (r *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
|
|
|
|
return string(r.ReplaceAllFunc([]byte(src), func(b []byte) []byte {
|
|
|
|
return []byte(repl(string(b)))
|
|
|
|
}))
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplaceAllLiteralString is the String version of ReplaceAllLiteral
|
|
|
|
func (r *Regexp) ReplaceAllLiteralString(src, repl string) string {
|
|
|
|
return string(r.ReplaceAllLiteral([]byte(src), []byte(repl)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Split slices s into substrings separated by the
|
|
|
|
// expression and returns a slice of the substrings
|
|
|
|
// between those expression matches.
|
|
|
|
//
|
|
|
|
// Example:
|
2023-07-17 06:23:47 +00:00
|
|
|
//
|
2022-05-18 23:28:40 +00:00
|
|
|
// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
|
|
|
|
// // s: ["", "b", "b", "c", "cadaaae"]
|
2023-07-17 06:23:47 +00:00
|
|
|
//
|
2022-05-18 23:28:40 +00:00
|
|
|
// The count determines the number of substrings to return:
|
2023-07-17 06:23:47 +00:00
|
|
|
//
|
2022-05-18 23:28:40 +00:00
|
|
|
// n > 0: at most n substrings; the last substring will be the unsplit remainder.
|
|
|
|
// n == 0: the result is nil (zero substrings)
|
|
|
|
// n < 0: all substrings
|
|
|
|
func (r *Regexp) Split(s string, n int) []string {
|
|
|
|
if n == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(r.expr) > 0 && len(s) == 0 {
|
|
|
|
return []string{""}
|
|
|
|
}
|
|
|
|
|
|
|
|
matches := r.FindAllStringIndex(s, n)
|
|
|
|
strings := make([]string, 0, len(matches))
|
|
|
|
|
|
|
|
beg := 0
|
|
|
|
end := 0
|
|
|
|
for _, match := range matches {
|
|
|
|
if n > 0 && len(strings) >= n-1 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
end = match[0]
|
|
|
|
if match[1] != 0 {
|
|
|
|
strings = append(strings, s[beg:end])
|
|
|
|
}
|
|
|
|
beg = match[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
if end != len(s) {
|
|
|
|
strings = append(strings, s[beg:])
|
|
|
|
}
|
|
|
|
|
|
|
|
return strings
|
|
|
|
}
|
|
|
|
|
|
|
|
// String returns the text of the regular expression
|
|
|
|
// used for compilation.
|
|
|
|
func (r *Regexp) String() string {
|
|
|
|
return r.expr
|
|
|
|
}
|
|
|
|
|
|
|
|
// SubexpIndex returns the index of the subexpression
|
|
|
|
// with the given name, or -1 if there is no subexpression
|
|
|
|
// with that name.
|
|
|
|
func (r *Regexp) SubexpIndex(name string) int {
|
|
|
|
r.mtx.Lock()
|
|
|
|
defer r.mtx.Unlock()
|
|
|
|
|
|
|
|
// Get C string of name
|
|
|
|
cName, err := libc.CString(name)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get substring index from name
|
|
|
|
ret := lib.Xpcre2_substring_number_from_name_8(r.tls, r.re, cName)
|
|
|
|
|
|
|
|
// If no substring error returned, return -1.
|
|
|
|
// If a different error is returned, panic.
|
|
|
|
if ret == lib.DPCRE2_ERROR_NOSUBSTRING {
|
|
|
|
return -1
|
|
|
|
} else if ret < 0 {
|
|
|
|
panic(codeToError(r.tls, ret))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the index of the subexpression
|
|
|
|
return int(ret)
|
|
|
|
}
|
|
|
|
|
2023-07-17 06:27:57 +00:00
|
|
|
// SetCallout sets a callout function that will be called at specified points in the matching operation.
|
|
|
|
// fn should return zero if it ran successfully or a non-zero integer to force an error.
|
|
|
|
// See https://www.pcre.org/current/doc/html/pcre2callout.html for more information.
|
2023-07-17 06:23:47 +00:00
|
|
|
func (r *Regexp) SetCallout(fn func(cb *CalloutBlock) int32) error {
|
|
|
|
cfn := func(tls *libc.TLS, cbptr, data uintptr) int32 {
|
|
|
|
ccb := (*lib.Tpcre2_callout_block_8)(unsafe.Pointer(cbptr))
|
|
|
|
|
|
|
|
cb := &CalloutBlock{
|
|
|
|
Version: ccb.Fversion,
|
|
|
|
CalloutNumber: ccb.Fcallout_number,
|
|
|
|
CaptureTop: ccb.Fcapture_top,
|
|
|
|
CaptureLast: ccb.Fcapture_last,
|
|
|
|
Mark: libc.GoString(ccb.Fmark),
|
|
|
|
StartMatch: uint(ccb.Fstart_match),
|
|
|
|
CurrentPosition: uint(ccb.Fcurrent_position),
|
|
|
|
PatternPosition: uint(ccb.Fpattern_position),
|
|
|
|
NextItemLength: uint(ccb.Fnext_item_length),
|
|
|
|
CalloutStringOffset: uint(ccb.Fcallout_string_offset),
|
|
|
|
CalloutFlags: CalloutFlags(ccb.Fcallout_flags),
|
|
|
|
}
|
|
|
|
|
|
|
|
subjectBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fsubject)), ccb.Fsubject_length)
|
|
|
|
cb.Subject = string(subjectBytes)
|
|
|
|
|
|
|
|
calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length)
|
|
|
|
cb.CalloutString = string(calloutStrBytes)
|
|
|
|
|
2023-07-17 14:11:35 +00:00
|
|
|
ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1)
|
|
|
|
if len(ovecSlice) > 2 {
|
|
|
|
ovecSlice = ovecSlice[2:]
|
|
|
|
for i := 0; i < len(ovecSlice); i += 2 {
|
|
|
|
if i+1 >= len(ovecSlice) {
|
|
|
|
cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:])
|
|
|
|
} else {
|
|
|
|
cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:ovecSlice[i+1]])
|
|
|
|
}
|
2023-07-17 06:23:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-17 06:33:46 +00:00
|
|
|
return fn(cb)
|
2023-07-17 06:23:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
r.calloutMtx.Lock()
|
|
|
|
defer r.calloutMtx.Unlock()
|
2023-07-17 06:32:06 +00:00
|
|
|
|
|
|
|
// Prevent callout function from being GC'd
|
2023-07-17 06:23:47 +00:00
|
|
|
r.callout = &cfn
|
|
|
|
|
|
|
|
ret := lib.Xpcre2_set_callout_8(r.tls, r.mctx, *(*uintptr)(unsafe.Pointer(&cfn)), 0)
|
|
|
|
if ret < 0 {
|
|
|
|
return codeToError(r.tls, ret)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-05-18 23:28:40 +00:00
|
|
|
// replaceBytes replaces the bytes at a given location, and returns a new
|
|
|
|
// offset, based on how much bigger or smaller the slice got after replacement
|
|
|
|
func replaceBytes(src, repl []byte, sOff, eOff lib.Tsize_t, diff int64) (int64, []byte) {
|
|
|
|
var out []byte
|
|
|
|
out = append(
|
|
|
|
src[:int64(sOff)+diff],
|
|
|
|
append(
|
|
|
|
repl,
|
|
|
|
src[int64(eOff)+diff:]...,
|
|
|
|
)...,
|
|
|
|
)
|
|
|
|
|
|
|
|
return diff + int64(len(out)-len(src)), out
|
|
|
|
}
|
|
|
|
|
|
|
|
// match calls the underlying pcre match functions. It re-runs the functions
|
|
|
|
// until no matches are found if multi is set to true.
|
|
|
|
func (r *Regexp) match(b []byte, options uint32, multi bool) ([][]lib.Tsize_t, error) {
|
2022-05-23 09:40:16 +00:00
|
|
|
if len(b) == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
2023-07-17 06:23:47 +00:00
|
|
|
|
2022-05-18 23:28:40 +00:00
|
|
|
r.mtx.Lock()
|
|
|
|
defer r.mtx.Unlock()
|
|
|
|
|
|
|
|
// Create a C pointer to the subject
|
|
|
|
sp := unsafe.Pointer(&b[0])
|
|
|
|
cSubject := uintptr(sp)
|
|
|
|
// Convert the size of the subject to a C size_t type
|
|
|
|
cSubjectLen := lib.Tsize_t(len(b))
|
|
|
|
|
|
|
|
// Create match data using the pattern to figure out the buffer size
|
|
|
|
md := lib.Xpcre2_match_data_create_from_pattern_8(r.tls, r.re, 0)
|
|
|
|
if md == 0 {
|
|
|
|
panic("error creating match data")
|
|
|
|
}
|
|
|
|
// Free the match data at the end of the function
|
|
|
|
defer lib.Xpcre2_match_data_free_8(r.tls, md)
|
|
|
|
|
|
|
|
var offset lib.Tsize_t
|
|
|
|
var out [][]lib.Tsize_t
|
|
|
|
// While the offset is less than the length of the subject
|
|
|
|
for offset < cSubjectLen {
|
|
|
|
// Execute expression on subject
|
2023-07-17 06:23:47 +00:00
|
|
|
ret := lib.Xpcre2_match_8(r.tls, r.re, cSubject, cSubjectLen, offset, options, md, r.mctx)
|
2022-05-18 23:28:40 +00:00
|
|
|
if ret < 0 {
|
|
|
|
// If no match found, break
|
|
|
|
if ret == lib.DPCRE2_ERROR_NOMATCH {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, codeToError(r.tls, ret)
|
|
|
|
} else {
|
|
|
|
// Get amount of pairs in output vector
|
|
|
|
pairAmt := lib.Xpcre2_get_ovector_count_8(r.tls, md)
|
|
|
|
// Get pointer to output vector
|
|
|
|
ovec := lib.Xpcre2_get_ovector_pointer_8(r.tls, md)
|
|
|
|
// Create a Go slice using the output vector as the underlying array
|
|
|
|
slice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ovec)), pairAmt*2)
|
|
|
|
|
|
|
|
// Create a new slice and copy the elements from the slice
|
|
|
|
// This is required because the match data will be freed in
|
|
|
|
// a defer, and that would cause a panic every time the slice
|
|
|
|
// is used later.
|
|
|
|
matches := make([]lib.Tsize_t, len(slice))
|
|
|
|
copy(matches, slice)
|
|
|
|
|
|
|
|
// If the two indices are the same (empty string), and the match is not
|
|
|
|
// immediately after another match, add it to the output and increment the
|
|
|
|
// offset. Otherwise, increment the offset and ignore the match.
|
|
|
|
if slice[0] == slice[1] && len(out) > 0 && slice[0] != out[len(out)-1][1] {
|
|
|
|
out = append(out, matches)
|
|
|
|
offset = slice[1] + 1
|
|
|
|
continue
|
|
|
|
} else if slice[0] == slice[1] {
|
|
|
|
offset = slice[1] + 1
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add the match to the output
|
|
|
|
out = append(out, matches)
|
|
|
|
// Set the next offset to the end index of the match
|
|
|
|
offset = matches[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
// If multiple matches disabled, break
|
|
|
|
if !multi {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// patternInfo calls the underlying pcre pattern info function
|
|
|
|
// and returns information about the compiled regular expression
|
|
|
|
func (r *Regexp) patternInfo(what uint32) (out uint32) {
|
|
|
|
// Create a C pointer to the output integer
|
|
|
|
cOut := uintptr(unsafe.Pointer(&out))
|
|
|
|
// Get information about the compiled pattern
|
|
|
|
lib.Xpcre2_pattern_info_8(r.tls, r.re, what, cOut)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close frees resources used by the regular expression.
|
|
|
|
func (r *Regexp) Close() error {
|
|
|
|
if r == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close thread-local storage
|
|
|
|
defer r.tls.Close()
|
|
|
|
|
|
|
|
// Free the compiled code
|
|
|
|
lib.Xpcre2_code_free_8(r.tls, r.re)
|
2023-07-17 06:23:47 +00:00
|
|
|
// Free the match context
|
|
|
|
lib.Xpcre2_match_context_free_8(r.tls, r.mctx)
|
2022-05-18 23:28:40 +00:00
|
|
|
// Set regular expression to null
|
|
|
|
r.re = 0
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|