2022-05-18 23:28:40 +00:00
// Package pcre is a library that provides pcre2 regular expressions
// in pure Go, allowing for features such as cross-compiling.
//
// The lib directory contains source code automatically translated from
// pcre2's C source code for each supported architecture and/or OS.
// This package wraps the automatically-translated source to provide a
// safe interface as close to Go's regexp library as possible.
package pcre
import (
"os"
"runtime"
"strconv"
"sync"
"unsafe"
2023-04-21 03:02:33 +00:00
"go.elara.ws/pcre/lib"
2022-05-18 23:28:40 +00:00
"modernc.org/libc"
)
2022-05-19 00:51:31 +00:00
// Version returns the version of pcre2 embedded in this library.
func Version ( ) string { return lib . DPACKAGE_VERSION }
2022-05-18 23:28:40 +00:00
// Regexp represents a pcre2 regular expression
type Regexp struct {
mtx * sync . Mutex
expr string
re uintptr
2023-07-17 06:23:47 +00:00
mctx uintptr
2022-05-18 23:28:40 +00:00
tls * libc . TLS
2023-07-17 06:23:47 +00:00
calloutMtx * sync . Mutex
callout * func ( tls * libc . TLS , cbptr , data uintptr ) int32
2022-05-18 23:28:40 +00:00
}
// Compile runs CompileOpts with no options.
//
// Close() should be called on the returned expression
// once it is no longer needed.
func Compile ( pattern string ) ( * Regexp , error ) {
return CompileOpts ( pattern , 0 )
}
// CompileOpts compiles the provided pattern using the given options.
//
// Close() should be called on the returned expression
// once it is no longer needed.
func CompileOpts ( pattern string , options CompileOption ) ( * Regexp , error ) {
tls := libc . NewTLS ( )
// Get C string of pattern
cPattern , err := libc . CString ( pattern )
if err != nil {
return nil , err
}
// Free the string when done
defer libc . Xfree ( tls , cPattern )
// Allocate new error
cErr := allocError ( tls )
// Free error when done
defer libc . Xfree ( tls , cErr )
// Get error offsets
errPtr := addErrCodeOffset ( cErr )
errOffsetPtr := addErrOffsetOffset ( cErr )
// Convert pattern length to size_t type
cPatLen := lib . Tsize_t ( len ( pattern ) )
// Compile expression
r := lib . Xpcre2_compile_8 ( tls , cPattern , cPatLen , uint32 ( options ) , errPtr , errOffsetPtr , 0 )
if r == 0 {
return nil , ptrToError ( tls , cErr )
}
// Create regexp instance
regex := Regexp {
2023-07-17 06:23:47 +00:00
expr : pattern ,
mtx : & sync . Mutex { } ,
re : r ,
mctx : lib . Xpcre2_match_context_create_8 ( tls , 0 ) ,
tls : tls ,
calloutMtx : & sync . Mutex { } ,
2022-05-18 23:28:40 +00:00
}
// Make sure resources are freed if GC collects the
// regular expression.
runtime . SetFinalizer ( & regex , func ( r * Regexp ) error {
return r . Close ( )
} )
return & regex , nil
}
// MustCompile compiles the given pattern and panics
// if there was an error
//
// Close() should be called on the returned expression
// once it is no longer needed.
func MustCompile ( pattern string ) * Regexp {
rgx , err := Compile ( pattern )
if err != nil {
panic ( err )
}
return rgx
}
// MustCompileOpts compiles the given pattern with the given
// options and panics if there was an error.
//
// Close() should be called on the returned expression
// once it is no longer needed.
func MustCompileOpts ( pattern string , options CompileOption ) * Regexp {
rgx , err := CompileOpts ( pattern , options )
if err != nil {
panic ( err )
}
return rgx
}
// Find returns the leftmost match of the regular expression.
// A return value of nil indicates no match.
func ( r * Regexp ) Find ( b [ ] byte ) [ ] byte {
matches , err := r . match ( b , 0 , false )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
return nil
}
match := matches [ 0 ]
return b [ match [ 0 ] : match [ 1 ] ]
}
// FindIndex returns a two-element slice of integers
// representing the location of the leftmost match of the
// regular expression.
func ( r * Regexp ) FindIndex ( b [ ] byte ) [ ] int {
matches , err := r . match ( b , 0 , false )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
return nil
}
match := matches [ 0 ]
return [ ] int { int ( match [ 0 ] ) , int ( match [ 1 ] ) }
}
// FindAll returns all matches of the regular expression.
// A return value of nil indicates no match.
func ( r * Regexp ) FindAll ( b [ ] byte , n int ) [ ] [ ] byte {
matches , err := r . match ( b , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 || n == 0 {
return nil
}
if n > 0 && len ( matches ) > n {
matches = matches [ : n ]
}
2022-05-20 10:09:55 +00:00
out := make ( [ ] [ ] byte , len ( matches ) )
for index , match := range matches {
out [ index ] = b [ match [ 0 ] : match [ 1 ] ]
2022-05-18 23:28:40 +00:00
}
return out
}
// FindAll returns indices of all matches of the
// regular expression. A return value of nil indicates
// no match.
func ( r * Regexp ) FindAllIndex ( b [ ] byte , n int ) [ ] [ ] int {
matches , err := r . match ( b , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 || n == 0 {
return nil
}
if n > 0 && len ( matches ) > n {
matches = matches [ : n ]
}
2022-05-20 10:09:55 +00:00
out := make ( [ ] [ ] int , len ( matches ) )
for index , match := range matches {
out [ index ] = [ ] int { int ( match [ 0 ] ) , int ( match [ 1 ] ) }
2022-05-18 23:28:40 +00:00
}
return out
}
// FindSubmatch returns a slice containing the match as the
// first element, and the submatches as the subsequent elements.
func ( r * Regexp ) FindSubmatch ( b [ ] byte ) [ ] [ ] byte {
matches , err := r . match ( b , 0 , false )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
return nil
}
match := matches [ 0 ]
2022-05-20 10:09:55 +00:00
out := make ( [ ] [ ] byte , 0 , len ( match ) / 2 )
2022-05-18 23:28:40 +00:00
for i := 0 ; i < len ( match ) ; i += 2 {
2022-05-20 10:09:55 +00:00
out = append ( out , b [ match [ i ] : match [ i + 1 ] ] )
2022-05-18 23:28:40 +00:00
}
return out
}
// FindSubmatchIndex returns a slice of index pairs representing
// the match and submatches, if any.
func ( r * Regexp ) FindSubmatchIndex ( b [ ] byte ) [ ] int {
matches , err := r . match ( b , 0 , false )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
return nil
}
match := matches [ 0 ]
out := make ( [ ] int , len ( match ) )
for index , offset := range match {
out [ index ] = int ( offset )
}
return out
}
// FindAllSubmatch returns a slice of all matches and submatches
// of the regular expression. It will return no more than n matches.
// If n < 0, it will return all matches.
func ( r * Regexp ) FindAllSubmatch ( b [ ] byte , n int ) [ ] [ ] [ ] byte {
matches , err := r . match ( b , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 || n == 0 {
return nil
}
if n > 0 && len ( matches ) > n {
matches = matches [ : n ]
}
out := make ( [ ] [ ] [ ] byte , len ( matches ) )
for index , match := range matches {
2022-05-20 10:09:55 +00:00
outMatch := make ( [ ] [ ] byte , 0 , len ( match ) / 2 )
2022-05-18 23:28:40 +00:00
for i := 0 ; i < len ( match ) ; i += 2 {
outMatch = append ( outMatch , b [ match [ i ] : match [ i + 1 ] ] )
}
out [ index ] = outMatch
}
return out
}
// FindAllSubmatch returns a slice of all indeces representing the
// locations of matches and submatches, if any, of the regular expression.
// It will return no more than n matches. If n < 0, it will return all matches.
func ( r * Regexp ) FindAllSubmatchIndex ( b [ ] byte , n int ) [ ] [ ] int {
matches , err := r . match ( b , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 || n == 0 {
return nil
}
if n > 0 && len ( matches ) > n {
matches = matches [ : n ]
}
out := make ( [ ] [ ] int , len ( matches ) )
for index , match := range matches {
offsets := make ( [ ] int , len ( match ) )
for index , offset := range match {
offsets [ index ] = int ( offset )
}
out [ index ] = offsets
}
return out
}
// FindString is the String version of Find
func ( r * Regexp ) FindString ( s string ) string {
return string ( r . Find ( [ ] byte ( s ) ) )
}
// FindStringIndex is the String version of FindIndex
func ( r * Regexp ) FindStringIndex ( s string ) [ ] int {
return r . FindIndex ( [ ] byte ( s ) )
}
// FinAllString is the String version of FindAll
func ( r * Regexp ) FindAllString ( s string , n int ) [ ] string {
matches := r . FindAll ( [ ] byte ( s ) , n )
2023-07-17 06:23:47 +00:00
2022-05-18 23:28:40 +00:00
out := make ( [ ] string , len ( matches ) )
for index , match := range matches {
out [ index ] = string ( match )
}
return out
}
// FindAllStringIndex is the String version of FindIndex
func ( r * Regexp ) FindAllStringIndex ( s string , n int ) [ ] [ ] int {
return r . FindAllIndex ( [ ] byte ( s ) , n )
}
// FindStringSubmatch is the string version of FindSubmatch
func ( r * Regexp ) FindStringSubmatch ( s string ) [ ] string {
matches := r . FindSubmatch ( [ ] byte ( s ) )
out := make ( [ ] string , len ( matches ) )
for index , match := range matches {
out [ index ] = string ( match )
}
return out
}
// FindStringSubmatchIndex is the String version of FindSubmatchIndex
func ( r * Regexp ) FindStringSubmatchIndex ( s string ) [ ] int {
return r . FindSubmatchIndex ( [ ] byte ( s ) )
}
// FindAllStringSubmatch is the String version of FindAllSubmatch
func ( r * Regexp ) FindAllStringSubmatch ( s string , n int ) [ ] [ ] string {
matches := r . FindAllSubmatch ( [ ] byte ( s ) , n )
out := make ( [ ] [ ] string , len ( matches ) )
for index , match := range matches {
outMatch := make ( [ ] string , len ( match ) )
for index , byteMatch := range match {
outMatch [ index ] = string ( byteMatch )
}
out [ index ] = outMatch
}
return out
}
// FindAllStringSubmatchIndex is the String version of FindAllSubmatchIndex
func ( r * Regexp ) FindAllStringSubmatchIndex ( s string , n int ) [ ] [ ] int {
return r . FindAllSubmatchIndex ( [ ] byte ( s ) , n )
}
// Match reports whether b contains a match of the regular expression
func ( r * Regexp ) Match ( b [ ] byte ) bool {
return r . Find ( b ) != nil
}
// MatchString is the String version of Match
func ( r * Regexp ) MatchString ( s string ) bool {
return r . Find ( [ ] byte ( s ) ) != nil
}
// NumSubexp returns the number of parenthesized subexpressions
// in the regular expression.
func ( r * Regexp ) NumSubexp ( ) int {
return int ( r . patternInfo ( lib . DPCRE2_INFO_CAPTURECOUNT ) )
}
// ReplaceAll returns a copy of src, replacing matches of the
// regular expression with the replacement text repl.
// Inside repl, $ signs are interpreted as in Expand,
// so for instance $1 represents the text of the first
2022-05-21 18:45:30 +00:00
// submatch and $name would represent the text of the
// subexpression called "name".
2022-05-18 23:28:40 +00:00
func ( r * Regexp ) ReplaceAll ( src , repl [ ] byte ) [ ] byte {
matches , err := r . match ( src , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
2022-05-21 18:56:35 +00:00
return src
2022-05-18 23:28:40 +00:00
}
out := make ( [ ] byte , len ( src ) )
copy ( out , src )
var diff int64
for _ , match := range matches {
replStr := os . Expand ( string ( repl ) , func ( s string ) string {
i , err := strconv . Atoi ( s )
if err != nil {
2022-05-21 18:45:30 +00:00
i = r . SubexpIndex ( s )
if i == - 1 {
return ""
}
2022-05-18 23:28:40 +00:00
}
// If there given match does not exist, return empty string
if i == 0 || len ( match ) < ( 2 * i ) + 1 {
return ""
}
// Return match
return string ( src [ match [ 2 * i ] : match [ ( 2 * i ) + 1 ] ] )
} )
// Replace replacement string with expanded string
repl := [ ] byte ( replStr )
// Replace bytes with new replacement string
diff , out = replaceBytes ( out , repl , match [ 0 ] , match [ 1 ] , diff )
}
return out
}
// ReplaceAllFunc returns a copy of src in which all matches of the
// regular expression have been replaced by the return value of function
// repl applied to the matched byte slice. The replacement returned by
// repl is substituted directly, without using Expand.
func ( r * Regexp ) ReplaceAllFunc ( src [ ] byte , repl func ( [ ] byte ) [ ] byte ) [ ] byte {
matches , err := r . match ( src , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
2022-05-21 18:56:35 +00:00
return src
2022-05-18 23:28:40 +00:00
}
out := make ( [ ] byte , len ( src ) )
copy ( out , src )
var diff int64
for _ , match := range matches {
replBytes := repl ( src [ match [ 0 ] : match [ 1 ] ] )
diff , out = replaceBytes ( out , replBytes , match [ 0 ] , match [ 1 ] , diff )
}
return out
}
// ReplaceAllLiteral returns a copy of src, replacing matches of
// the regular expression with the replacement bytes repl.
// The replacement is substituted directly, without using Expand.
func ( r * Regexp ) ReplaceAllLiteral ( src , repl [ ] byte ) [ ] byte {
matches , err := r . match ( src , 0 , true )
if err != nil {
panic ( err )
}
if len ( matches ) == 0 {
2022-05-21 18:56:35 +00:00
return src
2022-05-18 23:28:40 +00:00
}
out := make ( [ ] byte , len ( src ) )
copy ( out , src )
var diff int64
for _ , match := range matches {
diff , out = replaceBytes ( out , repl , match [ 0 ] , match [ 1 ] , diff )
}
return out
}
// ReplaceAllString is the String version of ReplaceAll
func ( r * Regexp ) ReplaceAllString ( src , repl string ) string {
return string ( r . ReplaceAll ( [ ] byte ( src ) , [ ] byte ( repl ) ) )
}
// ReplaceAllStringFunc is the String version of ReplaceAllFunc
func ( r * Regexp ) ReplaceAllStringFunc ( src string , repl func ( string ) string ) string {
return string ( r . ReplaceAllFunc ( [ ] byte ( src ) , func ( b [ ] byte ) [ ] byte {
return [ ] byte ( repl ( string ( b ) ) )
} ) )
}
// ReplaceAllLiteralString is the String version of ReplaceAllLiteral
func ( r * Regexp ) ReplaceAllLiteralString ( src , repl string ) string {
return string ( r . ReplaceAllLiteral ( [ ] byte ( src ) , [ ] byte ( repl ) ) )
}
// Split slices s into substrings separated by the
// expression and returns a slice of the substrings
// between those expression matches.
//
// Example:
2023-07-17 06:23:47 +00:00
//
2022-05-18 23:28:40 +00:00
// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
// // s: ["", "b", "b", "c", "cadaaae"]
2023-07-17 06:23:47 +00:00
//
2022-05-18 23:28:40 +00:00
// The count determines the number of substrings to return:
2023-07-17 06:23:47 +00:00
//
2022-05-18 23:28:40 +00:00
// n > 0: at most n substrings; the last substring will be the unsplit remainder.
// n == 0: the result is nil (zero substrings)
// n < 0: all substrings
func ( r * Regexp ) Split ( s string , n int ) [ ] string {
if n == 0 {
return nil
}
if len ( r . expr ) > 0 && len ( s ) == 0 {
return [ ] string { "" }
}
matches := r . FindAllStringIndex ( s , n )
strings := make ( [ ] string , 0 , len ( matches ) )
beg := 0
end := 0
for _ , match := range matches {
if n > 0 && len ( strings ) >= n - 1 {
break
}
end = match [ 0 ]
if match [ 1 ] != 0 {
strings = append ( strings , s [ beg : end ] )
}
beg = match [ 1 ]
}
if end != len ( s ) {
strings = append ( strings , s [ beg : ] )
}
return strings
}
// String returns the text of the regular expression
// used for compilation.
func ( r * Regexp ) String ( ) string {
return r . expr
}
// SubexpIndex returns the index of the subexpression
// with the given name, or -1 if there is no subexpression
// with that name.
func ( r * Regexp ) SubexpIndex ( name string ) int {
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
// Get C string of name
cName , err := libc . CString ( name )
if err != nil {
panic ( err )
}
// Get substring index from name
ret := lib . Xpcre2_substring_number_from_name_8 ( r . tls , r . re , cName )
// If no substring error returned, return -1.
// If a different error is returned, panic.
if ret == lib . DPCRE2_ERROR_NOSUBSTRING {
return - 1
} else if ret < 0 {
panic ( codeToError ( r . tls , ret ) )
}
// Return the index of the subexpression
return int ( ret )
}
2023-07-17 06:23:47 +00:00
type CalloutFlags uint32
const (
CalloutStartMatch = CalloutFlags ( lib . DPCRE2_CALLOUT_STARTMATCH )
CalloutBacktrack = CalloutFlags ( lib . DPCRE2_CALLOUT_BACKTRACK )
)
type CalloutBlock struct {
// Version contains the version number of the block format.
// The current version is 2.
Version uint32
// CalloutNumber contains the number of the callout, in the range 0-255.
// This is the number that follows "?C". For callouts with string arguments,
// this will always be zero.
CalloutNumber uint32
// CaptureTop contains the number of the highest numbered substring
// captured so far plus one. If no substrings have yet been captured,
// CaptureTop will be set to 1.
CaptureTop uint32
// CaptureLast contains the number of the last substring that was captured.
CaptureLast uint32
// Substrings contains all of the substrings captured so far.
Substrings [ ] string
Mark string
// Subject contains the string passed to the match function.
Subject string
// StartMatch contains the offset within the subject at which the current match attempt started.
StartMatch uint
// CurrentPosition contains the offset of the current match pointer within the subject.
CurrentPosition uint
// PatternPosition contains the offset within the pattern string to the next item to be matched.
PatternPosition uint
// NextItemLength contains the length of the next item to be processed in the pattern string.
NextItemLength uint
// CalloutStringOffset contains the code unit offset to the start of the callout argument string within the original pattern string.
CalloutStringOffset uint
// CalloutString is the string for the callout. For numerical callouts, this will always be empty.
CalloutString string
// CalloutFlags contains the following flags:
// CalloutStartMatch
// This is set for the first callout after the start of matching for each new starting position in the subject.
// CalloutBacktrack
// This is set if there has been a matching backtrack since the previous callout, or since the start of matching if this is the first callout from a pcre2_match() run.
//
// Both bits are set when a backtrack has caused a "bumpalong" to a new starting position in the subject. Output
CalloutFlags CalloutFlags
}
2023-07-17 06:27:57 +00:00
// SetCallout sets a callout function that will be called at specified points in the matching operation.
// fn should return zero if it ran successfully or a non-zero integer to force an error.
// See https://www.pcre.org/current/doc/html/pcre2callout.html for more information.
2023-07-17 06:23:47 +00:00
func ( r * Regexp ) SetCallout ( fn func ( cb * CalloutBlock ) int32 ) error {
cfn := func ( tls * libc . TLS , cbptr , data uintptr ) int32 {
ccb := ( * lib . Tpcre2_callout_block_8 ) ( unsafe . Pointer ( cbptr ) )
cb := & CalloutBlock {
Version : ccb . Fversion ,
CalloutNumber : ccb . Fcallout_number ,
CaptureTop : ccb . Fcapture_top ,
CaptureLast : ccb . Fcapture_last ,
Mark : libc . GoString ( ccb . Fmark ) ,
StartMatch : uint ( ccb . Fstart_match ) ,
CurrentPosition : uint ( ccb . Fcurrent_position ) ,
PatternPosition : uint ( ccb . Fpattern_position ) ,
NextItemLength : uint ( ccb . Fnext_item_length ) ,
CalloutStringOffset : uint ( ccb . Fcallout_string_offset ) ,
CalloutFlags : CalloutFlags ( ccb . Fcallout_flags ) ,
}
subjectBytes := unsafe . Slice ( ( * byte ) ( unsafe . Pointer ( ccb . Fsubject ) ) , ccb . Fsubject_length )
cb . Subject = string ( subjectBytes )
calloutStrBytes := unsafe . Slice ( ( * byte ) ( unsafe . Pointer ( ccb . Fcallout_string ) ) , ccb . Fcallout_string_length )
cb . CalloutString = string ( calloutStrBytes )
ovecSlice := unsafe . Slice ( ( * lib . Tsize_t ) ( unsafe . Pointer ( ccb . Foffset_vector ) ) , ( ccb . Fcapture_top * 2 ) - 1 ) [ 2 : ]
for i := 0 ; i < len ( ovecSlice ) ; i += 2 {
if i + 1 >= len ( ovecSlice ) {
cb . Substrings = append ( cb . Substrings , cb . Subject [ ovecSlice [ i ] : ] )
} else {
cb . Substrings = append ( cb . Substrings , cb . Subject [ ovecSlice [ i ] : ovecSlice [ i + 1 ] ] )
}
}
2023-07-17 06:33:46 +00:00
return fn ( cb )
2023-07-17 06:23:47 +00:00
}
r . calloutMtx . Lock ( )
defer r . calloutMtx . Unlock ( )
2023-07-17 06:32:06 +00:00
// Prevent callout function from being GC'd
2023-07-17 06:23:47 +00:00
r . callout = & cfn
ret := lib . Xpcre2_set_callout_8 ( r . tls , r . mctx , * ( * uintptr ) ( unsafe . Pointer ( & cfn ) ) , 0 )
if ret < 0 {
return codeToError ( r . tls , ret )
}
return nil
}
2022-05-18 23:28:40 +00:00
// replaceBytes replaces the bytes at a given location, and returns a new
// offset, based on how much bigger or smaller the slice got after replacement
func replaceBytes ( src , repl [ ] byte , sOff , eOff lib . Tsize_t , diff int64 ) ( int64 , [ ] byte ) {
var out [ ] byte
out = append (
src [ : int64 ( sOff ) + diff ] ,
append (
repl ,
src [ int64 ( eOff ) + diff : ] ... ,
) ... ,
)
return diff + int64 ( len ( out ) - len ( src ) ) , out
}
// match calls the underlying pcre match functions. It re-runs the functions
// until no matches are found if multi is set to true.
func ( r * Regexp ) match ( b [ ] byte , options uint32 , multi bool ) ( [ ] [ ] lib . Tsize_t , error ) {
2022-05-23 09:40:16 +00:00
if len ( b ) == 0 {
return nil , nil
}
2023-07-17 06:23:47 +00:00
2022-05-18 23:28:40 +00:00
r . mtx . Lock ( )
defer r . mtx . Unlock ( )
// Create a C pointer to the subject
sp := unsafe . Pointer ( & b [ 0 ] )
cSubject := uintptr ( sp )
// Convert the size of the subject to a C size_t type
cSubjectLen := lib . Tsize_t ( len ( b ) )
// Create match data using the pattern to figure out the buffer size
md := lib . Xpcre2_match_data_create_from_pattern_8 ( r . tls , r . re , 0 )
if md == 0 {
panic ( "error creating match data" )
}
// Free the match data at the end of the function
defer lib . Xpcre2_match_data_free_8 ( r . tls , md )
var offset lib . Tsize_t
var out [ ] [ ] lib . Tsize_t
// While the offset is less than the length of the subject
for offset < cSubjectLen {
// Execute expression on subject
2023-07-17 06:23:47 +00:00
ret := lib . Xpcre2_match_8 ( r . tls , r . re , cSubject , cSubjectLen , offset , options , md , r . mctx )
2022-05-18 23:28:40 +00:00
if ret < 0 {
// If no match found, break
if ret == lib . DPCRE2_ERROR_NOMATCH {
break
}
return nil , codeToError ( r . tls , ret )
} else {
// Get amount of pairs in output vector
pairAmt := lib . Xpcre2_get_ovector_count_8 ( r . tls , md )
// Get pointer to output vector
ovec := lib . Xpcre2_get_ovector_pointer_8 ( r . tls , md )
// Create a Go slice using the output vector as the underlying array
slice := unsafe . Slice ( ( * lib . Tsize_t ) ( unsafe . Pointer ( ovec ) ) , pairAmt * 2 )
// Create a new slice and copy the elements from the slice
// This is required because the match data will be freed in
// a defer, and that would cause a panic every time the slice
// is used later.
matches := make ( [ ] lib . Tsize_t , len ( slice ) )
copy ( matches , slice )
// If the two indices are the same (empty string), and the match is not
// immediately after another match, add it to the output and increment the
// offset. Otherwise, increment the offset and ignore the match.
if slice [ 0 ] == slice [ 1 ] && len ( out ) > 0 && slice [ 0 ] != out [ len ( out ) - 1 ] [ 1 ] {
out = append ( out , matches )
offset = slice [ 1 ] + 1
continue
} else if slice [ 0 ] == slice [ 1 ] {
offset = slice [ 1 ] + 1
continue
}
// Add the match to the output
out = append ( out , matches )
// Set the next offset to the end index of the match
offset = matches [ 1 ]
}
// If multiple matches disabled, break
if ! multi {
break
}
}
return out , nil
}
// patternInfo calls the underlying pcre pattern info function
// and returns information about the compiled regular expression
func ( r * Regexp ) patternInfo ( what uint32 ) ( out uint32 ) {
// Create a C pointer to the output integer
cOut := uintptr ( unsafe . Pointer ( & out ) )
// Get information about the compiled pattern
lib . Xpcre2_pattern_info_8 ( r . tls , r . re , what , cOut )
return
}
// Close frees resources used by the regular expression.
func ( r * Regexp ) Close ( ) error {
if r == nil {
return nil
}
// Close thread-local storage
defer r . tls . Close ( )
// Free the compiled code
lib . Xpcre2_code_free_8 ( r . tls , r . re )
2023-07-17 06:23:47 +00:00
// Free the match context
lib . Xpcre2_match_context_free_8 ( r . tls , r . mctx )
2022-05-18 23:28:40 +00:00
// Set regular expression to null
r . re = 0
return nil
}