Compare commits
11 Commits
af322c4a52
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 4ce849193f | |||
| 107b5db1fb | |||
| e90cc6feac | |||
| 18912728e9 | |||
| 0a0008aef4 | |||
| 6d906d55a6 | |||
| d1b9df80a1 | |||
| bde850752d | |||
| bafc40da8a | |||
| c6fb3e8489 | |||
| 23df260dfd |
13
.woodpecker.yml
Normal file
13
.woodpecker.yml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
matrix:
|
||||||
|
platform:
|
||||||
|
- linux/amd64
|
||||||
|
- linux/arm64
|
||||||
|
- linux/riscv64
|
||||||
|
|
||||||
|
steps:
|
||||||
|
test:
|
||||||
|
image: gitea.elara.ws/elara6331/golang:latest
|
||||||
|
commands:
|
||||||
|
- go test
|
||||||
|
when:
|
||||||
|
- event: push
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
# pcre
|
# pcre
|
||||||
|
|
||||||
[](https://pkg.go.dev/go.arsenm.dev/pcre)
|
[](https://pkg.go.dev/go.elara.ws/pcre)
|
||||||
|
[](https://ci.elara.ws/49)
|
||||||
|
|
||||||
This package provides a CGo-free port of the PCRE2 regular expression library. The [lib](lib) directory contains source code automatically translated from PCRE2's C source. This package wraps that code and provides an interface as close as possible to Go's stdlib [regexp](https://pkg.go.dev/regexp) package
|
This package provides a CGo-free port of the PCRE2 regular expression library. The [lib](lib) directory contains source code automatically translated from PCRE2's C source. This package wraps that code and provides an interface as close as possible to Go's stdlib [regexp](https://pkg.go.dev/regexp) package
|
||||||
|
|
||||||
|
|||||||
@@ -39,17 +39,17 @@ func TestCompileGlob(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestGlob(t *testing.T) {
|
func TestGlob(t *testing.T) {
|
||||||
err := os.MkdirAll("pcretest/dir1", 0755)
|
err := os.MkdirAll("pcretest/dir1", 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = os.MkdirAll("pcretest/dir2", 0755)
|
err = os.MkdirAll("pcretest/dir2", 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = os.MkdirAll("pcretest/test1/dir4", 0755)
|
err = os.MkdirAll("pcretest/test1/dir4", 0o755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -113,7 +113,7 @@ func TestGlob(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func touch(path string) error {
|
func touch(path string) error {
|
||||||
fl, err := os.OpenFile(path, os.O_CREATE, 0644)
|
fl, err := os.OpenFile(path, os.O_CREATE, 0o644)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
91
pcre.go
91
pcre.go
@@ -8,6 +8,7 @@
|
|||||||
package pcre
|
package pcre
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -19,6 +20,8 @@ import (
|
|||||||
"modernc.org/libc"
|
"modernc.org/libc"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const Unset = math.MaxUint
|
||||||
|
|
||||||
// Version returns the version of pcre2 embedded in this library.
|
// Version returns the version of pcre2 embedded in this library.
|
||||||
func Version() string { return lib.DPACKAGE_VERSION }
|
func Version() string { return lib.DPACKAGE_VERSION }
|
||||||
|
|
||||||
@@ -208,7 +211,11 @@ func (r *Regexp) FindSubmatch(b []byte) [][]byte {
|
|||||||
|
|
||||||
out := make([][]byte, 0, len(match)/2)
|
out := make([][]byte, 0, len(match)/2)
|
||||||
for i := 0; i < len(match); i += 2 {
|
for i := 0; i < len(match); i += 2 {
|
||||||
out = append(out, b[match[i]:match[i+1]])
|
if match[i] == Unset {
|
||||||
|
out = append(out, nil)
|
||||||
|
} else {
|
||||||
|
out = append(out, b[match[i]:match[i+1]])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
@@ -253,7 +260,11 @@ func (r *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
|
|||||||
outMatch := make([][]byte, 0, len(match)/2)
|
outMatch := make([][]byte, 0, len(match)/2)
|
||||||
|
|
||||||
for i := 0; i < len(match); i += 2 {
|
for i := 0; i < len(match); i += 2 {
|
||||||
outMatch = append(outMatch, b[match[i]:match[i+1]])
|
if match[i] == Unset {
|
||||||
|
outMatch = append(outMatch, nil)
|
||||||
|
} else {
|
||||||
|
outMatch = append(outMatch, b[match[i]:match[i+1]])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out[index] = outMatch
|
out[index] = outMatch
|
||||||
@@ -565,67 +576,6 @@ func (r *Regexp) SubexpIndex(name string) int {
|
|||||||
return int(ret)
|
return int(ret)
|
||||||
}
|
}
|
||||||
|
|
||||||
type CalloutFlags uint32
|
|
||||||
|
|
||||||
const (
|
|
||||||
CalloutStartMatch = CalloutFlags(lib.DPCRE2_CALLOUT_STARTMATCH)
|
|
||||||
CalloutBacktrack = CalloutFlags(lib.DPCRE2_CALLOUT_BACKTRACK)
|
|
||||||
)
|
|
||||||
|
|
||||||
type CalloutBlock struct {
|
|
||||||
// Version contains the version number of the block format.
|
|
||||||
// The current version is 2.
|
|
||||||
Version uint32
|
|
||||||
|
|
||||||
// CalloutNumber contains the number of the callout, in the range 0-255.
|
|
||||||
// This is the number that follows "?C". For callouts with string arguments,
|
|
||||||
// this will always be zero.
|
|
||||||
CalloutNumber uint32
|
|
||||||
|
|
||||||
// CaptureTop contains the number of the highest numbered substring
|
|
||||||
// captured so far plus one. If no substrings have yet been captured,
|
|
||||||
// CaptureTop will be set to 1.
|
|
||||||
CaptureTop uint32
|
|
||||||
|
|
||||||
// CaptureLast contains the number of the last substring that was captured.
|
|
||||||
CaptureLast uint32
|
|
||||||
|
|
||||||
// Substrings contains all of the substrings captured so far.
|
|
||||||
Substrings []string
|
|
||||||
|
|
||||||
Mark string
|
|
||||||
|
|
||||||
// Subject contains the string passed to the match function.
|
|
||||||
Subject string
|
|
||||||
|
|
||||||
// StartMatch contains the offset within the subject at which the current match attempt started.
|
|
||||||
StartMatch uint
|
|
||||||
|
|
||||||
// CurrentPosition contains the offset of the current match pointer within the subject.
|
|
||||||
CurrentPosition uint
|
|
||||||
|
|
||||||
// PatternPosition contains the offset within the pattern string to the next item to be matched.
|
|
||||||
PatternPosition uint
|
|
||||||
|
|
||||||
// NextItemLength contains the length of the next item to be processed in the pattern string.
|
|
||||||
NextItemLength uint
|
|
||||||
|
|
||||||
// CalloutStringOffset contains the code unit offset to the start of the callout argument string within the original pattern string.
|
|
||||||
CalloutStringOffset uint
|
|
||||||
|
|
||||||
// CalloutString is the string for the callout. For numerical callouts, this will always be empty.
|
|
||||||
CalloutString string
|
|
||||||
|
|
||||||
// CalloutFlags contains the following flags:
|
|
||||||
// CalloutStartMatch
|
|
||||||
// This is set for the first callout after the start of matching for each new starting position in the subject.
|
|
||||||
// CalloutBacktrack
|
|
||||||
// This is set if there has been a matching backtrack since the previous callout, or since the start of matching if this is the first callout from a pcre2_match() run.
|
|
||||||
//
|
|
||||||
// Both bits are set when a backtrack has caused a "bumpalong" to a new starting position in the subject. Output
|
|
||||||
CalloutFlags CalloutFlags
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetCallout sets a callout function that will be called at specified points in the matching operation.
|
// SetCallout sets a callout function that will be called at specified points in the matching operation.
|
||||||
// fn should return zero if it ran successfully or a non-zero integer to force an error.
|
// fn should return zero if it ran successfully or a non-zero integer to force an error.
|
||||||
// See https://www.pcre.org/current/doc/html/pcre2callout.html for more information.
|
// See https://www.pcre.org/current/doc/html/pcre2callout.html for more information.
|
||||||
@@ -653,12 +603,15 @@ func (r *Regexp) SetCallout(fn func(cb *CalloutBlock) int32) error {
|
|||||||
calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length)
|
calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length)
|
||||||
cb.CalloutString = string(calloutStrBytes)
|
cb.CalloutString = string(calloutStrBytes)
|
||||||
|
|
||||||
ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1)[2:]
|
ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1)
|
||||||
for i := 0; i < len(ovecSlice); i += 2 {
|
if len(ovecSlice) > 2 {
|
||||||
if i+1 >= len(ovecSlice) {
|
ovecSlice = ovecSlice[2:]
|
||||||
cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:])
|
for i := 0; i < len(ovecSlice); i += 2 {
|
||||||
} else {
|
if i+1 >= len(ovecSlice) {
|
||||||
cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:ovecSlice[i+1]])
|
cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:])
|
||||||
|
} else {
|
||||||
|
cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:ovecSlice[i+1]])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"go.elara.ws/pcre"
|
"go.elara.ws/pcre"
|
||||||
)
|
)
|
||||||
@@ -289,3 +290,11 @@ func TestCallout(t *testing.T) {
|
|||||||
t.Error("expected regular expression to match the string")
|
t.Error("expected regular expression to match the string")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestVarnish(t *testing.T) {
|
||||||
|
regex := pcre.MustCompile(`varnish(?: \(Varnish\/([\d.]{1,250})\))?`)
|
||||||
|
matches := regex.FindStringSubmatch("1.1 varnish")
|
||||||
|
if !reflect.DeepEqual(matches, []string{"varnish", ""}) {
|
||||||
|
t.Errorf(`Expected ["varnish" ""], got %q`, matches)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
120
types.go
120
types.go
@@ -6,33 +6,95 @@ type CompileOption uint32
|
|||||||
|
|
||||||
// Compile option bits
|
// Compile option bits
|
||||||
const (
|
const (
|
||||||
Anchored = CompileOption(lib.DPCRE2_ANCHORED)
|
Anchored = CompileOption(lib.DPCRE2_ANCHORED)
|
||||||
AllowEmptyClass = CompileOption(lib.DPCRE2_ALLOW_EMPTY_CLASS)
|
AllowEmptyClass = CompileOption(lib.DPCRE2_ALLOW_EMPTY_CLASS)
|
||||||
AltBsux = CompileOption(lib.DPCRE2_ALT_BSUX)
|
AltBsux = CompileOption(lib.DPCRE2_ALT_BSUX)
|
||||||
AltCircumflex = CompileOption(lib.DPCRE2_ALT_CIRCUMFLEX)
|
AltCircumflex = CompileOption(lib.DPCRE2_ALT_CIRCUMFLEX)
|
||||||
AltVerbnames = CompileOption(lib.DPCRE2_ALT_VERBNAMES)
|
AltVerbnames = CompileOption(lib.DPCRE2_ALT_VERBNAMES)
|
||||||
AutoCallout = CompileOption(lib.DPCRE2_AUTO_CALLOUT)
|
AutoCallout = CompileOption(lib.DPCRE2_AUTO_CALLOUT)
|
||||||
Caseless = CompileOption(lib.DPCRE2_CASELESS)
|
Caseless = CompileOption(lib.DPCRE2_CASELESS)
|
||||||
DollarEndOnly = CompileOption(lib.DPCRE2_DOLLAR_ENDONLY)
|
DollarEndOnly = CompileOption(lib.DPCRE2_DOLLAR_ENDONLY)
|
||||||
DotAll = CompileOption(lib.DPCRE2_DOTALL)
|
DotAll = CompileOption(lib.DPCRE2_DOTALL)
|
||||||
DupNames = CompileOption(lib.DPCRE2_DUPNAMES)
|
DupNames = CompileOption(lib.DPCRE2_DUPNAMES)
|
||||||
EndAnchored = CompileOption(lib.DPCRE2_ENDANCHORED)
|
EndAnchored = CompileOption(lib.DPCRE2_ENDANCHORED)
|
||||||
Extended = CompileOption(lib.DPCRE2_EXTENDED)
|
Extended = CompileOption(lib.DPCRE2_EXTENDED)
|
||||||
FirstLine = CompileOption(lib.DPCRE2_FIRSTLINE)
|
FirstLine = CompileOption(lib.DPCRE2_FIRSTLINE)
|
||||||
Literal = CompileOption(lib.DPCRE2_LITERAL)
|
Literal = CompileOption(lib.DPCRE2_LITERAL)
|
||||||
MatchInvalidUTF = CompileOption(lib.DPCRE2_MATCH_INVALID_UTF)
|
MatchInvalidUTF = CompileOption(lib.DPCRE2_MATCH_INVALID_UTF)
|
||||||
MactchUnsetBackref = CompileOption(lib.DPCRE2_MATCH_UNSET_BACKREF)
|
MatchUnsetBackref = CompileOption(lib.DPCRE2_MATCH_UNSET_BACKREF)
|
||||||
Multiline = CompileOption(lib.DPCRE2_MULTILINE)
|
Multiline = CompileOption(lib.DPCRE2_MULTILINE)
|
||||||
NeverBackslashC = CompileOption(lib.DPCRE2_NEVER_BACKSLASH_C)
|
NeverBackslashC = CompileOption(lib.DPCRE2_NEVER_BACKSLASH_C)
|
||||||
NeverUCP = CompileOption(lib.DPCRE2_NEVER_UCP)
|
NeverUCP = CompileOption(lib.DPCRE2_NEVER_UCP)
|
||||||
NeverUTF = CompileOption(lib.DPCRE2_NEVER_UTF)
|
NeverUTF = CompileOption(lib.DPCRE2_NEVER_UTF)
|
||||||
NoAutoCapture = CompileOption(lib.DPCRE2_NO_AUTO_CAPTURE)
|
NoAutoCapture = CompileOption(lib.DPCRE2_NO_AUTO_CAPTURE)
|
||||||
NoAutoPossess = CompileOption(lib.DPCRE2_NO_AUTO_POSSESS)
|
NoAutoPossess = CompileOption(lib.DPCRE2_NO_AUTO_POSSESS)
|
||||||
NoDotStarAnchor = CompileOption(lib.DPCRE2_NO_DOTSTAR_ANCHOR)
|
NoDotStarAnchor = CompileOption(lib.DPCRE2_NO_DOTSTAR_ANCHOR)
|
||||||
NoStartOptimize = CompileOption(lib.DPCRE2_NO_START_OPTIMIZE)
|
NoStartOptimize = CompileOption(lib.DPCRE2_NO_START_OPTIMIZE)
|
||||||
NoUTFCheck = CompileOption(lib.DPCRE2_NO_UTF_CHECK)
|
NoUTFCheck = CompileOption(lib.DPCRE2_NO_UTF_CHECK)
|
||||||
UCP = CompileOption(lib.DPCRE2_UCP)
|
UCP = CompileOption(lib.DPCRE2_UCP)
|
||||||
Ungreedy = CompileOption(lib.DPCRE2_UNGREEDY)
|
Ungreedy = CompileOption(lib.DPCRE2_UNGREEDY)
|
||||||
UseOffsetLimit = CompileOption(lib.DPCRE2_USE_OFFSET_LIMIT)
|
UseOffsetLimit = CompileOption(lib.DPCRE2_USE_OFFSET_LIMIT)
|
||||||
UTF = CompileOption(lib.DPCRE2_UTF)
|
UTF = CompileOption(lib.DPCRE2_UTF)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type CalloutFlags uint32
|
||||||
|
|
||||||
|
const (
|
||||||
|
CalloutStartMatch = CalloutFlags(lib.DPCRE2_CALLOUT_STARTMATCH)
|
||||||
|
CalloutBacktrack = CalloutFlags(lib.DPCRE2_CALLOUT_BACKTRACK)
|
||||||
|
)
|
||||||
|
|
||||||
|
// CalloutBlock contains the data passed to callout functions
|
||||||
|
type CalloutBlock struct {
|
||||||
|
// Version contains the version number of the block format.
|
||||||
|
// The current version is 2.
|
||||||
|
Version uint32
|
||||||
|
|
||||||
|
// CalloutNumber contains the number of the callout, in the range 0-255.
|
||||||
|
// This is the number that follows "?C". For callouts with string arguments,
|
||||||
|
// this will always be zero.
|
||||||
|
CalloutNumber uint32
|
||||||
|
|
||||||
|
// CaptureTop contains the number of the highest numbered substring
|
||||||
|
// captured so far plus one. If no substrings have yet been captured,
|
||||||
|
// CaptureTop will be set to 1.
|
||||||
|
CaptureTop uint32
|
||||||
|
|
||||||
|
// CaptureLast contains the number of the last substring that was captured.
|
||||||
|
CaptureLast uint32
|
||||||
|
|
||||||
|
// Substrings contains all of the substrings captured so far.
|
||||||
|
Substrings []string
|
||||||
|
|
||||||
|
Mark string
|
||||||
|
|
||||||
|
// Subject contains the string passed to the match function.
|
||||||
|
Subject string
|
||||||
|
|
||||||
|
// StartMatch contains the offset within the subject at which the current match attempt started.
|
||||||
|
StartMatch uint
|
||||||
|
|
||||||
|
// CurrentPosition contains the offset of the current match pointer within the subject.
|
||||||
|
CurrentPosition uint
|
||||||
|
|
||||||
|
// PatternPosition contains the offset within the pattern string to the next item to be matched.
|
||||||
|
PatternPosition uint
|
||||||
|
|
||||||
|
// NextItemLength contains the length of the next item to be processed in the pattern string.
|
||||||
|
NextItemLength uint
|
||||||
|
|
||||||
|
// CalloutStringOffset contains the code unit offset to the start of the callout argument string within the original pattern string.
|
||||||
|
CalloutStringOffset uint
|
||||||
|
|
||||||
|
// CalloutString is the string for the callout. For numerical callouts, this will always be empty.
|
||||||
|
CalloutString string
|
||||||
|
|
||||||
|
// CalloutFlags contains the following flags:
|
||||||
|
// CalloutStartMatch
|
||||||
|
// This is set for the first callout after the start of matching for each new starting position in the subject.
|
||||||
|
// CalloutBacktrack
|
||||||
|
// This is set if there has been a matching backtrack since the previous callout, or since the start of matching if this is the first callout from a pcre2_match() run.
|
||||||
|
//
|
||||||
|
// Both bits are set when a backtrack has caused a "bumpalong" to a new starting position in the subject.
|
||||||
|
CalloutFlags CalloutFlags
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user