forked from Elara6331/pcre
		
	Implement Callouts
This commit is contained in:
		
							
								
								
									
										135
									
								
								pcre.go
									
									
									
									
									
								
							
							
						
						
									
										135
									
								
								pcre.go
									
									
									
									
									
								
							| @@ -27,7 +27,11 @@ type Regexp struct { | ||||
| 	mtx  *sync.Mutex | ||||
| 	expr string | ||||
| 	re   uintptr | ||||
| 	mctx uintptr | ||||
| 	tls  *libc.TLS | ||||
|  | ||||
| 	calloutMtx *sync.Mutex | ||||
| 	callout    *func(tls *libc.TLS, cbptr, data uintptr) int32 | ||||
| } | ||||
|  | ||||
| // Compile runs CompileOpts with no options. | ||||
| @@ -73,10 +77,12 @@ func CompileOpts(pattern string, options CompileOption) (*Regexp, error) { | ||||
|  | ||||
| 	// Create regexp instance | ||||
| 	regex := Regexp{ | ||||
| 		expr: pattern, | ||||
| 		mtx:  &sync.Mutex{}, | ||||
| 		re:   r, | ||||
| 		tls:  tls, | ||||
| 		expr:       pattern, | ||||
| 		mtx:        &sync.Mutex{}, | ||||
| 		re:         r, | ||||
| 		mctx:       lib.Xpcre2_match_context_create_8(tls, 0), | ||||
| 		tls:        tls, | ||||
| 		calloutMtx: &sync.Mutex{}, | ||||
| 	} | ||||
|  | ||||
| 	// Make sure resources are freed if GC collects the | ||||
| @@ -298,7 +304,7 @@ func (r *Regexp) FindStringIndex(s string) []int { | ||||
| // FinAllString is the String version of FindAll | ||||
| func (r *Regexp) FindAllString(s string, n int) []string { | ||||
| 	matches := r.FindAll([]byte(s), n) | ||||
| 	 | ||||
|  | ||||
| 	out := make([]string, len(matches)) | ||||
| 	for index, match := range matches { | ||||
| 		out[index] = string(match) | ||||
| @@ -483,9 +489,12 @@ func (r *Regexp) ReplaceAllLiteralString(src, repl string) string { | ||||
| // between those expression matches. | ||||
| // | ||||
| // Example: | ||||
| // | ||||
| //	s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) | ||||
| //	// s: ["", "b", "b", "c", "cadaaae"] | ||||
| // | ||||
| // The count determines the number of substrings to return: | ||||
| // | ||||
| //	n > 0: at most n substrings; the last substring will be the unsplit remainder. | ||||
| //	n == 0: the result is nil (zero substrings) | ||||
| //	n < 0: all substrings | ||||
| @@ -556,6 +565,116 @@ func (r *Regexp) SubexpIndex(name string) int { | ||||
| 	return int(ret) | ||||
| } | ||||
|  | ||||
| type CalloutFlags uint32 | ||||
|  | ||||
| const ( | ||||
| 	CalloutStartMatch = CalloutFlags(lib.DPCRE2_CALLOUT_STARTMATCH) | ||||
| 	CalloutBacktrack  = CalloutFlags(lib.DPCRE2_CALLOUT_BACKTRACK) | ||||
| ) | ||||
|  | ||||
| type CalloutBlock struct { | ||||
| 	// Version contains the version number of the block format. | ||||
| 	// The current version is 2. | ||||
| 	Version uint32 | ||||
|  | ||||
| 	// CalloutNumber contains the number of the callout, in the range 0-255. | ||||
| 	// This is the number that follows "?C". For callouts with string arguments, | ||||
| 	// this will always be zero. | ||||
| 	CalloutNumber uint32 | ||||
|  | ||||
| 	// CaptureTop contains the number of the highest numbered substring | ||||
| 	// captured so far plus one. If no substrings have yet been captured, | ||||
| 	// CaptureTop will be set to 1. | ||||
| 	CaptureTop uint32 | ||||
|  | ||||
| 	// CaptureLast contains the number of the last substring that was captured. | ||||
| 	CaptureLast uint32 | ||||
|  | ||||
| 	// Substrings contains all of the substrings captured so far. | ||||
| 	Substrings []string | ||||
|  | ||||
| 	Mark string | ||||
|  | ||||
| 	// Subject contains the string passed to the match function. | ||||
| 	Subject string | ||||
|  | ||||
| 	// StartMatch contains the offset within the subject at which the current match attempt started. | ||||
| 	StartMatch uint | ||||
|  | ||||
| 	// CurrentPosition contains the offset of the current match pointer within the subject. | ||||
| 	CurrentPosition uint | ||||
|  | ||||
| 	// PatternPosition contains the offset within the pattern string to the next item to be matched. | ||||
| 	PatternPosition uint | ||||
|  | ||||
| 	// NextItemLength contains the length of the next item to be processed in the pattern string. | ||||
| 	NextItemLength uint | ||||
|  | ||||
| 	// CalloutStringOffset contains the code unit offset to the start of the callout argument string within the original pattern string. | ||||
| 	CalloutStringOffset uint | ||||
|  | ||||
| 	// CalloutString is the string for the callout. For numerical callouts, this will always be empty. | ||||
| 	CalloutString string | ||||
|  | ||||
| 	// CalloutFlags contains the following flags: | ||||
| 	// 	CalloutStartMatch | ||||
| 	// This is set for the first callout after the start of matching for each new starting position in the subject. | ||||
| 	// 	CalloutBacktrack | ||||
| 	// This is set if there has been a matching backtrack since the previous callout, or since the start of matching if this is the first callout from a pcre2_match() run. | ||||
| 	// | ||||
| 	// Both bits are set when a backtrack has caused a "bumpalong" to a new starting position in the subject. Output | ||||
| 	CalloutFlags CalloutFlags | ||||
| } | ||||
|  | ||||
| func (r *Regexp) SetCallout(fn func(cb *CalloutBlock) int32) error { | ||||
| 	cfn := func(tls *libc.TLS, cbptr, data uintptr) int32 { | ||||
| 		ccb := (*lib.Tpcre2_callout_block_8)(unsafe.Pointer(cbptr)) | ||||
|  | ||||
| 		cb := &CalloutBlock{ | ||||
| 			Version:             ccb.Fversion, | ||||
| 			CalloutNumber:       ccb.Fcallout_number, | ||||
| 			CaptureTop:          ccb.Fcapture_top, | ||||
| 			CaptureLast:         ccb.Fcapture_last, | ||||
| 			Mark:                libc.GoString(ccb.Fmark), | ||||
| 			StartMatch:          uint(ccb.Fstart_match), | ||||
| 			CurrentPosition:     uint(ccb.Fcurrent_position), | ||||
| 			PatternPosition:     uint(ccb.Fpattern_position), | ||||
| 			NextItemLength:      uint(ccb.Fnext_item_length), | ||||
| 			CalloutStringOffset: uint(ccb.Fcallout_string_offset), | ||||
| 			CalloutFlags:        CalloutFlags(ccb.Fcallout_flags), | ||||
| 		} | ||||
|  | ||||
| 		subjectBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fsubject)), ccb.Fsubject_length) | ||||
| 		cb.Subject = string(subjectBytes) | ||||
|  | ||||
| 		calloutStrBytes := unsafe.Slice((*byte)(unsafe.Pointer(ccb.Fcallout_string)), ccb.Fcallout_string_length) | ||||
| 		cb.CalloutString = string(calloutStrBytes) | ||||
|  | ||||
| 		ovecSlice := unsafe.Slice((*lib.Tsize_t)(unsafe.Pointer(ccb.Foffset_vector)), (ccb.Fcapture_top*2)-1)[2:] | ||||
| 		for i := 0; i < len(ovecSlice); i += 2 { | ||||
| 			if i+1 >= len(ovecSlice) { | ||||
| 				cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:]) | ||||
| 			} else { | ||||
| 				cb.Substrings = append(cb.Substrings, cb.Subject[ovecSlice[i]:ovecSlice[i+1]]) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		x := fn(cb) | ||||
| 		return x | ||||
| 	} | ||||
|  | ||||
| 	// Prevent callout functions from being GC'd | ||||
| 	r.calloutMtx.Lock() | ||||
| 	defer r.calloutMtx.Unlock() | ||||
| 	r.callout = &cfn | ||||
|  | ||||
| 	ret := lib.Xpcre2_set_callout_8(r.tls, r.mctx, *(*uintptr)(unsafe.Pointer(&cfn)), 0) | ||||
| 	if ret < 0 { | ||||
| 		return codeToError(r.tls, ret) | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // replaceBytes replaces the bytes at a given location, and returns a new | ||||
| // offset, based on how much bigger or smaller the slice got after replacement | ||||
| func replaceBytes(src, repl []byte, sOff, eOff lib.Tsize_t, diff int64) (int64, []byte) { | ||||
| @@ -577,7 +696,7 @@ func (r *Regexp) match(b []byte, options uint32, multi bool) ([][]lib.Tsize_t, e | ||||
| 	if len(b) == 0 { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 	 | ||||
|  | ||||
| 	r.mtx.Lock() | ||||
| 	defer r.mtx.Unlock() | ||||
|  | ||||
| @@ -600,7 +719,7 @@ func (r *Regexp) match(b []byte, options uint32, multi bool) ([][]lib.Tsize_t, e | ||||
| 	// While the offset is less than the length of the subject | ||||
| 	for offset < cSubjectLen { | ||||
| 		// Execute expression on subject | ||||
| 		ret := lib.Xpcre2_match_8(r.tls, r.re, cSubject, cSubjectLen, offset, options, md, 0) | ||||
| 		ret := lib.Xpcre2_match_8(r.tls, r.re, cSubject, cSubjectLen, offset, options, md, r.mctx) | ||||
| 		if ret < 0 { | ||||
| 			// If no match found, break | ||||
| 			if ret == lib.DPCRE2_ERROR_NOMATCH { | ||||
| @@ -670,6 +789,8 @@ func (r *Regexp) Close() error { | ||||
|  | ||||
| 	// Free the compiled code | ||||
| 	lib.Xpcre2_code_free_8(r.tls, r.re) | ||||
| 	// Free the match context | ||||
| 	lib.Xpcre2_match_context_free_8(r.tls, r.mctx) | ||||
| 	// Set regular expression to null | ||||
| 	r.re = 0 | ||||
|  | ||||
|   | ||||
							
								
								
									
										55
									
								
								pcre_test.go
									
									
									
									
									
								
							
							
						
						
									
										55
									
								
								pcre_test.go
									
									
									
									
									
								
							| @@ -234,3 +234,58 @@ func TestString(t *testing.T) { | ||||
| 		t.Errorf("expected %s, got %s", expr, r.String()) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestCallout(t *testing.T) { | ||||
| 	const expr = `(https?)://([.\w\d]+\.[\w\d]{2,4}[\w\d?&=%/.-]*)(?C2)` | ||||
| 	subject := "https://www.elara.ws/" | ||||
|  | ||||
| 	r := pcre.MustCompile(expr) | ||||
| 	defer r.Close() | ||||
|  | ||||
| 	executed := false | ||||
| 	r.SetCallout(func(cb *pcre.CalloutBlock) int32 { | ||||
| 		executed = true | ||||
|  | ||||
| 		if cb.CalloutNumber != 2 { | ||||
| 			t.Errorf("[CalloutNumber] expected %d, got %d", 2, cb.CalloutNumber) | ||||
| 		} | ||||
|  | ||||
| 		if cb.CaptureTop != 3 { | ||||
| 			t.Errorf("[CaptureTop] expected %d, got %d", 3, cb.CaptureTop) | ||||
| 		} | ||||
|  | ||||
| 		if cb.CaptureLast != 2 { | ||||
| 			t.Errorf("[CaptureLast] expected %d, got %d", 2, cb.CaptureLast) | ||||
| 		} | ||||
|  | ||||
| 		if cb.Subject != subject { | ||||
| 			t.Errorf("[Subject] expected %q, got %q", subject, cb.Subject) | ||||
| 		} | ||||
|  | ||||
| 		if cb.StartMatch != 0 { | ||||
| 			t.Errorf("[StartMatch] expected %d, got %d", 0, cb.StartMatch) | ||||
| 		} | ||||
|  | ||||
| 		if cb.CurrentPosition != 21 { | ||||
| 			t.Errorf("[CurrentPosition] expected %d, got %d", 0, cb.CurrentPosition) | ||||
| 		} | ||||
|  | ||||
| 		if cb.PatternPosition != 53 { | ||||
| 			t.Errorf("[PatternPosition] expected %d, got %d", 53, cb.PatternPosition) | ||||
| 		} | ||||
|  | ||||
| 		if cb.NextItemLength != 0 { | ||||
| 			t.Errorf("[NextItemLength] expected %d, got %d", 0, cb.NextItemLength) | ||||
| 		} | ||||
|  | ||||
| 		return 0 | ||||
| 	}) | ||||
|  | ||||
| 	m := r.MatchString(subject) | ||||
|  | ||||
| 	if !executed { | ||||
| 		t.Error("expected callout to be executed") | ||||
| 	} else if !m { | ||||
| 		t.Error("expected regular expression to match the string") | ||||
| 	} | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user