Compare commits

..

1 Commits

Author SHA1 Message Date
54d8a33099 recompute ovecSlice
The first two entries are always PCRE2_UNSET, then there's 2*(capture_top-1). At least that's how I read https://www.pcre.org/current/doc/html/pcre2callout.html
2023-07-23 18:02:22 +00:00
5 changed files with 34 additions and 68 deletions

View File

@ -1,13 +0,0 @@
matrix:
platform:
- linux/amd64
- linux/arm64
- linux/riscv64
steps:
test:
image: gitea.elara.ws/elara6331/golang:latest
commands:
- go test
when:
- event: push

View File

@ -1,7 +1,6 @@
# pcre # pcre
[![Go Reference](https://pkg.go.dev/badge/go.elara.ws/pcre.svg)](https://pkg.go.dev/go.elara.ws/pcre) [![Go Reference](https://pkg.go.dev/badge/go.elara.ws/pcre.svg)](https://pkg.go.dev/go.elara.ws/pcre)
[![status-badge](https://ci.elara.ws/api/badges/49/status.svg)](https://ci.elara.ws/49)
This package provides a CGo-free port of the PCRE2 regular expression library. The [lib](lib) directory contains source code automatically translated from PCRE2's C source. This package wraps that code and provides an interface as close as possible to Go's stdlib [regexp](https://pkg.go.dev/regexp) package This package provides a CGo-free port of the PCRE2 regular expression library. The [lib](lib) directory contains source code automatically translated from PCRE2's C source. This package wraps that code and provides an interface as close as possible to Go's stdlib [regexp](https://pkg.go.dev/regexp) package
@ -51,4 +50,4 @@ CC=/usr/bin/gcc ccgo -o pcre2_<os>_<arch>.go -pkgname lib -trace-translation-uni
- If cross-compiling, set the `CCGO_CC` variable to to path of the cross-compiler, and the `CCGO_AR` variable to the path of the cross-compiler's `ar` binary. Also, set `TARGET_GOARCH` to the GOARCH you're targeting and `TARGET_GOOS` to the OS you're targeting. - If cross-compiling, set the `CCGO_CC` variable to to path of the cross-compiler, and the `CCGO_AR` variable to the path of the cross-compiler's `ar` binary. Also, set `TARGET_GOARCH` to the GOARCH you're targeting and `TARGET_GOOS` to the OS you're targeting.
- Once the command completes, two go files will be created. One will start with `pcre2`, the other with `capi`. Copy both of these to the `lib` directory in this repo. - Once the command completes, two go files will be created. One will start with `pcre2`, the other with `capi`. Copy both of these to the `lib` directory in this repo.

19
pcre.go
View File

@ -8,7 +8,6 @@
package pcre package pcre
import ( import (
"math"
"os" "os"
"runtime" "runtime"
"strconv" "strconv"
@ -20,8 +19,6 @@ import (
"modernc.org/libc" "modernc.org/libc"
) )
const Unset = math.MaxUint
// Version returns the version of pcre2 embedded in this library. // Version returns the version of pcre2 embedded in this library.
func Version() string { return lib.DPACKAGE_VERSION } func Version() string { return lib.DPACKAGE_VERSION }
@ -211,11 +208,7 @@ func (r *Regexp) FindSubmatch(b []byte) [][]byte {
out := make([][]byte, 0, len(match)/2) out := make([][]byte, 0, len(match)/2)
for i := 0; i < len(match); i += 2 { for i := 0; i < len(match); i += 2 {
if match[i] == Unset { out = append(out, b[match[i]:match[i+1]])
out = append(out, nil)
} else {
out = append(out, b[match[i]:match[i+1]])
}
} }
return out return out
} }
@ -260,11 +253,7 @@ func (r *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
outMatch := make([][]byte, 0, len(match)/2) outMatch := make([][]byte, 0, len(match)/2)
for i := 0; i < len(match); i += 2 { for i := 0; i < len(match); i += 2 {
if match[i] == Unset { outMatch = append(outMatch, b[match[i]:match[i+1]])
outMatch = append(outMatch, nil)
} else {
outMatch = append(outMatch, b[match[i]:match[i+1]])
}
} }
out[index] = outMatch out[index] = outMatch
@ -602,8 +591,8 @@ func (r *Regexp) SetCallout(fn func(cb *CalloutBlock) int32) error {
calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length) calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length)
cb.CalloutString = string(calloutStrBytes) cb.CalloutString = string(calloutStrBytes)
ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1) ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), 2+(ccb.Fcapture_top-1)*2)
if len(ovecSlice) > 2 { if len(ovecSlice) > 2 {
ovecSlice = ovecSlice[2:] ovecSlice = ovecSlice[2:]
for i := 0; i < len(ovecSlice); i += 2 { for i := 0; i < len(ovecSlice); i += 2 {

View File

@ -4,7 +4,6 @@ import (
"strings" "strings"
"sync" "sync"
"testing" "testing"
"reflect"
"go.elara.ws/pcre" "go.elara.ws/pcre"
) )
@ -290,11 +289,3 @@ func TestCallout(t *testing.T) {
t.Error("expected regular expression to match the string") t.Error("expected regular expression to match the string")
} }
} }
func TestVarnish(t *testing.T) {
regex := pcre.MustCompile(`varnish(?: \(Varnish\/([\d.]{1,250})\))?`)
matches := regex.FindStringSubmatch("1.1 varnish")
if !reflect.DeepEqual(matches, []string{"varnish", ""}) {
t.Errorf(`Expected ["varnish" ""], got %q`, matches)
}
}

View File

@ -6,35 +6,35 @@ type CompileOption uint32
// Compile option bits // Compile option bits
const ( const (
Anchored = CompileOption(lib.DPCRE2_ANCHORED) Anchored = CompileOption(lib.DPCRE2_ANCHORED)
AllowEmptyClass = CompileOption(lib.DPCRE2_ALLOW_EMPTY_CLASS) AllowEmptyClass = CompileOption(lib.DPCRE2_ALLOW_EMPTY_CLASS)
AltBsux = CompileOption(lib.DPCRE2_ALT_BSUX) AltBsux = CompileOption(lib.DPCRE2_ALT_BSUX)
AltCircumflex = CompileOption(lib.DPCRE2_ALT_CIRCUMFLEX) AltCircumflex = CompileOption(lib.DPCRE2_ALT_CIRCUMFLEX)
AltVerbnames = CompileOption(lib.DPCRE2_ALT_VERBNAMES) AltVerbnames = CompileOption(lib.DPCRE2_ALT_VERBNAMES)
AutoCallout = CompileOption(lib.DPCRE2_AUTO_CALLOUT) AutoCallout = CompileOption(lib.DPCRE2_AUTO_CALLOUT)
Caseless = CompileOption(lib.DPCRE2_CASELESS) Caseless = CompileOption(lib.DPCRE2_CASELESS)
DollarEndOnly = CompileOption(lib.DPCRE2_DOLLAR_ENDONLY) DollarEndOnly = CompileOption(lib.DPCRE2_DOLLAR_ENDONLY)
DotAll = CompileOption(lib.DPCRE2_DOTALL) DotAll = CompileOption(lib.DPCRE2_DOTALL)
DupNames = CompileOption(lib.DPCRE2_DUPNAMES) DupNames = CompileOption(lib.DPCRE2_DUPNAMES)
EndAnchored = CompileOption(lib.DPCRE2_ENDANCHORED) EndAnchored = CompileOption(lib.DPCRE2_ENDANCHORED)
Extended = CompileOption(lib.DPCRE2_EXTENDED) Extended = CompileOption(lib.DPCRE2_EXTENDED)
FirstLine = CompileOption(lib.DPCRE2_FIRSTLINE) FirstLine = CompileOption(lib.DPCRE2_FIRSTLINE)
Literal = CompileOption(lib.DPCRE2_LITERAL) Literal = CompileOption(lib.DPCRE2_LITERAL)
MatchInvalidUTF = CompileOption(lib.DPCRE2_MATCH_INVALID_UTF) MatchInvalidUTF = CompileOption(lib.DPCRE2_MATCH_INVALID_UTF)
MatchUnsetBackref = CompileOption(lib.DPCRE2_MATCH_UNSET_BACKREF) MactchUnsetBackref = CompileOption(lib.DPCRE2_MATCH_UNSET_BACKREF)
Multiline = CompileOption(lib.DPCRE2_MULTILINE) Multiline = CompileOption(lib.DPCRE2_MULTILINE)
NeverBackslashC = CompileOption(lib.DPCRE2_NEVER_BACKSLASH_C) NeverBackslashC = CompileOption(lib.DPCRE2_NEVER_BACKSLASH_C)
NeverUCP = CompileOption(lib.DPCRE2_NEVER_UCP) NeverUCP = CompileOption(lib.DPCRE2_NEVER_UCP)
NeverUTF = CompileOption(lib.DPCRE2_NEVER_UTF) NeverUTF = CompileOption(lib.DPCRE2_NEVER_UTF)
NoAutoCapture = CompileOption(lib.DPCRE2_NO_AUTO_CAPTURE) NoAutoCapture = CompileOption(lib.DPCRE2_NO_AUTO_CAPTURE)
NoAutoPossess = CompileOption(lib.DPCRE2_NO_AUTO_POSSESS) NoAutoPossess = CompileOption(lib.DPCRE2_NO_AUTO_POSSESS)
NoDotStarAnchor = CompileOption(lib.DPCRE2_NO_DOTSTAR_ANCHOR) NoDotStarAnchor = CompileOption(lib.DPCRE2_NO_DOTSTAR_ANCHOR)
NoStartOptimize = CompileOption(lib.DPCRE2_NO_START_OPTIMIZE) NoStartOptimize = CompileOption(lib.DPCRE2_NO_START_OPTIMIZE)
NoUTFCheck = CompileOption(lib.DPCRE2_NO_UTF_CHECK) NoUTFCheck = CompileOption(lib.DPCRE2_NO_UTF_CHECK)
UCP = CompileOption(lib.DPCRE2_UCP) UCP = CompileOption(lib.DPCRE2_UCP)
Ungreedy = CompileOption(lib.DPCRE2_UNGREEDY) Ungreedy = CompileOption(lib.DPCRE2_UNGREEDY)
UseOffsetLimit = CompileOption(lib.DPCRE2_USE_OFFSET_LIMIT) UseOffsetLimit = CompileOption(lib.DPCRE2_USE_OFFSET_LIMIT)
UTF = CompileOption(lib.DPCRE2_UTF) UTF = CompileOption(lib.DPCRE2_UTF)
) )
type CalloutFlags uint32 type CalloutFlags uint32