Compare commits
	
		
			11 Commits
		
	
	
		
			af322c4a52
			...
			master
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 4ce849193f | |||
| 107b5db1fb | |||
| e90cc6feac | |||
| 18912728e9 | |||
| 0a0008aef4 | |||
| 6d906d55a6 | |||
| d1b9df80a1 | |||
| bde850752d | |||
| bafc40da8a | |||
| c6fb3e8489 | |||
| 23df260dfd | 
							
								
								
									
										13
									
								
								.woodpecker.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								.woodpecker.yml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| matrix: | ||||
|   platform: | ||||
|     - linux/amd64 | ||||
|     - linux/arm64 | ||||
|     - linux/riscv64 | ||||
|  | ||||
| steps: | ||||
|   test: | ||||
|     image: gitea.elara.ws/elara6331/golang:latest | ||||
|     commands: | ||||
|       - go test | ||||
|     when: | ||||
|       - event: push | ||||
| @@ -1,6 +1,7 @@ | ||||
| # pcre | ||||
|  | ||||
| [](https://pkg.go.dev/go.arsenm.dev/pcre) | ||||
| [](https://pkg.go.dev/go.elara.ws/pcre) | ||||
| [](https://ci.elara.ws/49) | ||||
|  | ||||
| This package provides a CGo-free port of the PCRE2 regular expression library. The [lib](lib) directory contains source code automatically translated from PCRE2's C source. This package wraps that code and provides an interface as close as possible to Go's stdlib [regexp](https://pkg.go.dev/regexp) package | ||||
|  | ||||
| @@ -50,4 +51,4 @@ CC=/usr/bin/gcc ccgo -o pcre2_<os>_<arch>.go -pkgname lib -trace-translation-uni | ||||
|  | ||||
| - If cross-compiling, set the `CCGO_CC` variable to to path of the cross-compiler, and the `CCGO_AR` variable to the path of the cross-compiler's `ar` binary. Also, set `TARGET_GOARCH` to the GOARCH you're targeting and `TARGET_GOOS` to the OS you're targeting. | ||||
|  | ||||
| - Once the command completes, two go files will be created. One will start with `pcre2`, the other with `capi`. Copy both of these to the `lib` directory in this repo. | ||||
| - Once the command completes, two go files will be created. One will start with `pcre2`, the other with `capi`. Copy both of these to the `lib` directory in this repo. | ||||
|   | ||||
							
								
								
									
										2
									
								
								glob.go
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								glob.go
									
									
									
									
									
								
							| @@ -108,7 +108,7 @@ func Glob(glob string) ([]string, error) { | ||||
|  | ||||
| 	// Join splitDir and add filepath separator. This is the directory that will be searched. | ||||
| 	dir := filepath.Join(splitDir...) | ||||
| 	 | ||||
|  | ||||
| 	if filepath.IsAbs(glob) { | ||||
| 		dir = string(filepath.Separator) + dir | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										12
									
								
								glob_test.go
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								glob_test.go
									
									
									
									
									
								
							| @@ -39,17 +39,17 @@ func TestCompileGlob(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func TestGlob(t *testing.T) { | ||||
| 	err := os.MkdirAll("pcretest/dir1", 0755) | ||||
| 	err := os.MkdirAll("pcretest/dir1", 0o755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
|  | ||||
| 	err = os.MkdirAll("pcretest/dir2", 0755) | ||||
| 	err = os.MkdirAll("pcretest/dir2", 0o755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
|  | ||||
| 	err = os.MkdirAll("pcretest/test1/dir4", 0755) | ||||
| 	err = os.MkdirAll("pcretest/test1/dir4", 0o755) | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| @@ -58,7 +58,7 @@ func TestGlob(t *testing.T) { | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| 	} | ||||
| 	 | ||||
|  | ||||
| 	err = touch("pcretest/file2") | ||||
| 	if err != nil { | ||||
| 		t.Fatal(err) | ||||
| @@ -113,9 +113,9 @@ func TestGlob(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func touch(path string) error { | ||||
| 	fl, err := os.OpenFile(path, os.O_CREATE, 0644) | ||||
| 	fl, err := os.OpenFile(path, os.O_CREATE, 0o644) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return fl.Close() | ||||
| } | ||||
| } | ||||
|   | ||||
							
								
								
									
										91
									
								
								pcre.go
									
									
									
									
									
								
							
							
						
						
									
										91
									
								
								pcre.go
									
									
									
									
									
								
							| @@ -8,6 +8,7 @@ | ||||
| package pcre | ||||
|  | ||||
| import ( | ||||
| 	"math" | ||||
| 	"os" | ||||
| 	"runtime" | ||||
| 	"strconv" | ||||
| @@ -19,6 +20,8 @@ import ( | ||||
| 	"modernc.org/libc" | ||||
| ) | ||||
|  | ||||
| const Unset = math.MaxUint | ||||
|  | ||||
| // Version returns the version of pcre2 embedded in this library. | ||||
| func Version() string { return lib.DPACKAGE_VERSION } | ||||
|  | ||||
| @@ -208,7 +211,11 @@ func (r *Regexp) FindSubmatch(b []byte) [][]byte { | ||||
|  | ||||
| 	out := make([][]byte, 0, len(match)/2) | ||||
| 	for i := 0; i < len(match); i += 2 { | ||||
| 		out = append(out, b[match[i]:match[i+1]]) | ||||
| 		if match[i] == Unset { | ||||
| 			out = append(out, nil) | ||||
| 		} else { | ||||
| 			out = append(out, b[match[i]:match[i+1]]) | ||||
| 		} | ||||
| 	} | ||||
| 	return out | ||||
| } | ||||
| @@ -253,7 +260,11 @@ func (r *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { | ||||
| 		outMatch := make([][]byte, 0, len(match)/2) | ||||
|  | ||||
| 		for i := 0; i < len(match); i += 2 { | ||||
| 			outMatch = append(outMatch, b[match[i]:match[i+1]]) | ||||
| 			if match[i] == Unset { | ||||
| 				outMatch = append(outMatch, nil) | ||||
| 			} else { | ||||
| 				outMatch = append(outMatch, b[match[i]:match[i+1]]) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		out[index] = outMatch | ||||
| @@ -565,67 +576,6 @@ func (r *Regexp) SubexpIndex(name string) int { | ||||
| 	return int(ret) | ||||
| } | ||||
|  | ||||
| type CalloutFlags uint32 | ||||
|  | ||||
| const ( | ||||
| 	CalloutStartMatch = CalloutFlags(lib.DPCRE2_CALLOUT_STARTMATCH) | ||||
| 	CalloutBacktrack  = CalloutFlags(lib.DPCRE2_CALLOUT_BACKTRACK) | ||||
| ) | ||||
|  | ||||
| type CalloutBlock struct { | ||||
| 	// Version contains the version number of the block format. | ||||
| 	// The current version is 2. | ||||
| 	Version uint32 | ||||
|  | ||||
| 	// CalloutNumber contains the number of the callout, in the range 0-255. | ||||
| 	// This is the number that follows "?C". For callouts with string arguments, | ||||
| 	// this will always be zero. | ||||
| 	CalloutNumber uint32 | ||||
|  | ||||
| 	// CaptureTop contains the number of the highest numbered substring | ||||
| 	// captured so far plus one. If no substrings have yet been captured, | ||||
| 	// CaptureTop will be set to 1. | ||||
| 	CaptureTop uint32 | ||||
|  | ||||
| 	// CaptureLast contains the number of the last substring that was captured. | ||||
| 	CaptureLast uint32 | ||||
|  | ||||
| 	// Substrings contains all of the substrings captured so far. | ||||
| 	Substrings []string | ||||
|  | ||||
| 	Mark string | ||||
|  | ||||
| 	// Subject contains the string passed to the match function. | ||||
| 	Subject string | ||||
|  | ||||
| 	// StartMatch contains the offset within the subject at which the current match attempt started. | ||||
| 	StartMatch uint | ||||
|  | ||||
| 	// CurrentPosition contains the offset of the current match pointer within the subject. | ||||
| 	CurrentPosition uint | ||||
|  | ||||
| 	// PatternPosition contains the offset within the pattern string to the next item to be matched. | ||||
| 	PatternPosition uint | ||||
|  | ||||
| 	// NextItemLength contains the length of the next item to be processed in the pattern string. | ||||
| 	NextItemLength uint | ||||
|  | ||||
| 	// CalloutStringOffset contains the code unit offset to the start of the callout argument string within the original pattern string. | ||||
| 	CalloutStringOffset uint | ||||
|  | ||||
| 	// CalloutString is the string for the callout. For numerical callouts, this will always be empty. | ||||
| 	CalloutString string | ||||
|  | ||||
| 	// CalloutFlags contains the following flags: | ||||
| 	// 	CalloutStartMatch | ||||
| 	// This is set for the first callout after the start of matching for each new starting position in the subject. | ||||
| 	// 	CalloutBacktrack | ||||
| 	// This is set if there has been a matching backtrack since the previous callout, or since the start of matching if this is the first callout from a pcre2_match() run. | ||||
| 	// | ||||
| 	// Both bits are set when a backtrack has caused a "bumpalong" to a new starting position in the subject. Output | ||||
| 	CalloutFlags CalloutFlags | ||||
| } | ||||
|  | ||||
| // SetCallout sets a callout function that will be called at specified points in the matching operation. | ||||
| // fn should return zero if it ran successfully or a non-zero integer to force an error. | ||||
| // See https://www.pcre.org/current/doc/html/pcre2callout.html for more information. | ||||
| @@ -653,12 +603,15 @@ func (r *Regexp) SetCallout(fn func(cb *CalloutBlock) int32) error { | ||||
| 		calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length) | ||||
| 		cb.CalloutString = string(calloutStrBytes) | ||||
|  | ||||
| 		ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1)[2:] | ||||
| 		for i := 0; i < len(ovecSlice); i += 2 { | ||||
| 			if i+1 >= len(ovecSlice) { | ||||
| 				cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:]) | ||||
| 			} else { | ||||
| 				cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:ovecSlice[i+1]]) | ||||
| 		ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1) | ||||
| 		if len(ovecSlice) > 2 { | ||||
| 			ovecSlice = ovecSlice[2:] | ||||
| 			for i := 0; i < len(ovecSlice); i += 2 { | ||||
| 				if i+1 >= len(ovecSlice) { | ||||
| 					cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:]) | ||||
| 				} else { | ||||
| 					cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:ovecSlice[i+1]]) | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import ( | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"testing" | ||||
| 	"reflect" | ||||
|  | ||||
| 	"go.elara.ws/pcre" | ||||
| ) | ||||
| @@ -289,3 +290,11 @@ func TestCallout(t *testing.T) { | ||||
| 		t.Error("expected regular expression to match the string") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestVarnish(t *testing.T) { | ||||
| 	regex := pcre.MustCompile(`varnish(?: \(Varnish\/([\d.]{1,250})\))?`) | ||||
| 	matches := regex.FindStringSubmatch("1.1 varnish") | ||||
| 	if !reflect.DeepEqual(matches, []string{"varnish", ""}) { | ||||
| 		t.Errorf(`Expected ["varnish" ""], got %q`, matches) | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										120
									
								
								types.go
									
									
									
									
									
								
							
							
						
						
									
										120
									
								
								types.go
									
									
									
									
									
								
							| @@ -6,33 +6,95 @@ type CompileOption uint32 | ||||
|  | ||||
| // Compile option bits | ||||
| const ( | ||||
| 	Anchored           = CompileOption(lib.DPCRE2_ANCHORED) | ||||
| 	AllowEmptyClass    = CompileOption(lib.DPCRE2_ALLOW_EMPTY_CLASS) | ||||
| 	AltBsux            = CompileOption(lib.DPCRE2_ALT_BSUX) | ||||
| 	AltCircumflex      = CompileOption(lib.DPCRE2_ALT_CIRCUMFLEX) | ||||
| 	AltVerbnames       = CompileOption(lib.DPCRE2_ALT_VERBNAMES) | ||||
| 	AutoCallout        = CompileOption(lib.DPCRE2_AUTO_CALLOUT) | ||||
| 	Caseless           = CompileOption(lib.DPCRE2_CASELESS) | ||||
| 	DollarEndOnly      = CompileOption(lib.DPCRE2_DOLLAR_ENDONLY) | ||||
| 	DotAll             = CompileOption(lib.DPCRE2_DOTALL) | ||||
| 	DupNames           = CompileOption(lib.DPCRE2_DUPNAMES) | ||||
| 	EndAnchored        = CompileOption(lib.DPCRE2_ENDANCHORED) | ||||
| 	Extended           = CompileOption(lib.DPCRE2_EXTENDED) | ||||
| 	FirstLine          = CompileOption(lib.DPCRE2_FIRSTLINE) | ||||
| 	Literal            = CompileOption(lib.DPCRE2_LITERAL) | ||||
| 	MatchInvalidUTF    = CompileOption(lib.DPCRE2_MATCH_INVALID_UTF) | ||||
| 	MactchUnsetBackref = CompileOption(lib.DPCRE2_MATCH_UNSET_BACKREF) | ||||
| 	Multiline          = CompileOption(lib.DPCRE2_MULTILINE) | ||||
| 	NeverBackslashC    = CompileOption(lib.DPCRE2_NEVER_BACKSLASH_C) | ||||
| 	NeverUCP           = CompileOption(lib.DPCRE2_NEVER_UCP) | ||||
| 	NeverUTF           = CompileOption(lib.DPCRE2_NEVER_UTF) | ||||
| 	NoAutoCapture      = CompileOption(lib.DPCRE2_NO_AUTO_CAPTURE) | ||||
| 	NoAutoPossess      = CompileOption(lib.DPCRE2_NO_AUTO_POSSESS) | ||||
| 	NoDotStarAnchor    = CompileOption(lib.DPCRE2_NO_DOTSTAR_ANCHOR) | ||||
| 	NoStartOptimize    = CompileOption(lib.DPCRE2_NO_START_OPTIMIZE) | ||||
| 	NoUTFCheck         = CompileOption(lib.DPCRE2_NO_UTF_CHECK) | ||||
| 	UCP                = CompileOption(lib.DPCRE2_UCP) | ||||
| 	Ungreedy           = CompileOption(lib.DPCRE2_UNGREEDY) | ||||
| 	UseOffsetLimit     = CompileOption(lib.DPCRE2_USE_OFFSET_LIMIT) | ||||
| 	UTF                = CompileOption(lib.DPCRE2_UTF) | ||||
| 	Anchored          = CompileOption(lib.DPCRE2_ANCHORED) | ||||
| 	AllowEmptyClass   = CompileOption(lib.DPCRE2_ALLOW_EMPTY_CLASS) | ||||
| 	AltBsux           = CompileOption(lib.DPCRE2_ALT_BSUX) | ||||
| 	AltCircumflex     = CompileOption(lib.DPCRE2_ALT_CIRCUMFLEX) | ||||
| 	AltVerbnames      = CompileOption(lib.DPCRE2_ALT_VERBNAMES) | ||||
| 	AutoCallout       = CompileOption(lib.DPCRE2_AUTO_CALLOUT) | ||||
| 	Caseless          = CompileOption(lib.DPCRE2_CASELESS) | ||||
| 	DollarEndOnly     = CompileOption(lib.DPCRE2_DOLLAR_ENDONLY) | ||||
| 	DotAll            = CompileOption(lib.DPCRE2_DOTALL) | ||||
| 	DupNames          = CompileOption(lib.DPCRE2_DUPNAMES) | ||||
| 	EndAnchored       = CompileOption(lib.DPCRE2_ENDANCHORED) | ||||
| 	Extended          = CompileOption(lib.DPCRE2_EXTENDED) | ||||
| 	FirstLine         = CompileOption(lib.DPCRE2_FIRSTLINE) | ||||
| 	Literal           = CompileOption(lib.DPCRE2_LITERAL) | ||||
| 	MatchInvalidUTF   = CompileOption(lib.DPCRE2_MATCH_INVALID_UTF) | ||||
| 	MatchUnsetBackref = CompileOption(lib.DPCRE2_MATCH_UNSET_BACKREF) | ||||
| 	Multiline         = CompileOption(lib.DPCRE2_MULTILINE) | ||||
| 	NeverBackslashC   = CompileOption(lib.DPCRE2_NEVER_BACKSLASH_C) | ||||
| 	NeverUCP          = CompileOption(lib.DPCRE2_NEVER_UCP) | ||||
| 	NeverUTF          = CompileOption(lib.DPCRE2_NEVER_UTF) | ||||
| 	NoAutoCapture     = CompileOption(lib.DPCRE2_NO_AUTO_CAPTURE) | ||||
| 	NoAutoPossess     = CompileOption(lib.DPCRE2_NO_AUTO_POSSESS) | ||||
| 	NoDotStarAnchor   = CompileOption(lib.DPCRE2_NO_DOTSTAR_ANCHOR) | ||||
| 	NoStartOptimize   = CompileOption(lib.DPCRE2_NO_START_OPTIMIZE) | ||||
| 	NoUTFCheck        = CompileOption(lib.DPCRE2_NO_UTF_CHECK) | ||||
| 	UCP               = CompileOption(lib.DPCRE2_UCP) | ||||
| 	Ungreedy          = CompileOption(lib.DPCRE2_UNGREEDY) | ||||
| 	UseOffsetLimit    = CompileOption(lib.DPCRE2_USE_OFFSET_LIMIT) | ||||
| 	UTF               = CompileOption(lib.DPCRE2_UTF) | ||||
| ) | ||||
|  | ||||
| type CalloutFlags uint32 | ||||
|  | ||||
| const ( | ||||
| 	CalloutStartMatch = CalloutFlags(lib.DPCRE2_CALLOUT_STARTMATCH) | ||||
| 	CalloutBacktrack  = CalloutFlags(lib.DPCRE2_CALLOUT_BACKTRACK) | ||||
| ) | ||||
|  | ||||
| // CalloutBlock contains the data passed to callout functions | ||||
| type CalloutBlock struct { | ||||
| 	// Version contains the version number of the block format. | ||||
| 	// The current version is 2. | ||||
| 	Version uint32 | ||||
|  | ||||
| 	// CalloutNumber contains the number of the callout, in the range 0-255. | ||||
| 	// This is the number that follows "?C". For callouts with string arguments, | ||||
| 	// this will always be zero. | ||||
| 	CalloutNumber uint32 | ||||
|  | ||||
| 	// CaptureTop contains the number of the highest numbered substring | ||||
| 	// captured so far plus one. If no substrings have yet been captured, | ||||
| 	// CaptureTop will be set to 1. | ||||
| 	CaptureTop uint32 | ||||
|  | ||||
| 	// CaptureLast contains the number of the last substring that was captured. | ||||
| 	CaptureLast uint32 | ||||
|  | ||||
| 	// Substrings contains all of the substrings captured so far. | ||||
| 	Substrings []string | ||||
|  | ||||
| 	Mark string | ||||
|  | ||||
| 	// Subject contains the string passed to the match function. | ||||
| 	Subject string | ||||
|  | ||||
| 	// StartMatch contains the offset within the subject at which the current match attempt started. | ||||
| 	StartMatch uint | ||||
|  | ||||
| 	// CurrentPosition contains the offset of the current match pointer within the subject. | ||||
| 	CurrentPosition uint | ||||
|  | ||||
| 	// PatternPosition contains the offset within the pattern string to the next item to be matched. | ||||
| 	PatternPosition uint | ||||
|  | ||||
| 	// NextItemLength contains the length of the next item to be processed in the pattern string. | ||||
| 	NextItemLength uint | ||||
|  | ||||
| 	// CalloutStringOffset contains the code unit offset to the start of the callout argument string within the original pattern string. | ||||
| 	CalloutStringOffset uint | ||||
|  | ||||
| 	// CalloutString is the string for the callout. For numerical callouts, this will always be empty. | ||||
| 	CalloutString string | ||||
|  | ||||
| 	// CalloutFlags contains the following flags: | ||||
| 	// 	CalloutStartMatch | ||||
| 	// This is set for the first callout after the start of matching for each new starting position in the subject. | ||||
| 	// 	CalloutBacktrack | ||||
| 	// This is set if there has been a matching backtrack since the previous callout, or since the start of matching if this is the first callout from a pcre2_match() run. | ||||
| 	// | ||||
| 	// Both bits are set when a backtrack has caused a "bumpalong" to a new starting position in the subject. | ||||
| 	CalloutFlags CalloutFlags | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user