Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: pcre callouts



PCRE supports callouts similar to Perl's (?{ code }) but with different
syntax. There are string and numeric formats, and it seems logical
enough to evaluate the string forms as shell code.

So, e.g. (?C{foo}) or (?C'foo') will call the foo function. In Perl,
$_ is set to the string being examined. I've used .pcre.subject. Would
something else be better and should it perhaps start and end a new scope
to make that local? As in Perl, the return status can be used to make it
treat the callout as not matching.

This won't do anything for numeric callouts. They look mostly useful for
debugging. They could perhaps call a standard function passing the
number and string as parameters.

Oliver

diff --git a/Doc/Zsh/mod_pcre.yo b/Doc/Zsh/mod_pcre.yo
index da73ac85a..41fab4475 100644
--- a/Doc/Zsh/mod_pcre.yo
+++ b/Doc/Zsh/mod_pcre.yo
@@ -69,6 +69,11 @@ print -l $accum)
 )
 enditem()
 
+If the regular expression contains callouts, these are executed as shell code.
+During the execution of the callout, the string the regular expression is
+matching against is available in the parameter tt(.pcre.subject). If there is a
+non-zero return status from the shell code, the callout does not match.
+
 The option tt(-d) uses the alternative breadth-first DFA search algorithm of
 pcre. This sets tt(match), or the array given with tt(-a), to all the matches
 found from the same start point in the subject.
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index f5cda6d38..e321f18a4 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -128,6 +128,30 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
     return 0;
 }
 
+static int
+pcre_callout(pcre2_callout_block_8 *block, void *)
+{
+    Eprog prog;
+    int ret=0;
+
+    if (!block->callout_number &&
+	    ((prog = parse_string((char *) block->callout_string, 0))))
+    {
+	int ef = errflag, lv = lastval;
+
+	setsparam(".pcre.subject",
+		metafy((char *) block->subject, block->subject_length, META_DUP));
+	execode(prog, 1, 0, "pcre");
+	ret = lastval | errflag;
+
+	/* Restore any user interrupt error status */
+	errflag = ef | (errflag & ERRFLAG_INT);
+	lastval = lv;
+    }
+
+    return ret;
+}
+
 static int
 zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata,
 	int captured_count, char *matchvar, char *substravar, char *namedassoc,
@@ -339,6 +363,9 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
     plaintext = ztrdup(*args);
     unmetafy(plaintext, &subject_len);
 
+    pcre2_match_context_8 *mcontext = pcre2_match_context_create(NULL);
+    pcre2_set_callout(mcontext, &pcre_callout, 0);
+
     if (offset_start > 0 && offset_start >= subject_len)
 	ret = PCRE2_ERROR_NOMATCH;
     else if (use_dfa) {
@@ -347,7 +374,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
 	pcre_mdata = pcre2_match_data_create(capcount, NULL);
 	do {
 	    ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
-		offset_start, 0, pcre_mdata, NULL, (int *) workspace, wscount);
+		offset_start, 0, pcre_mdata, mcontext, (int *) workspace, wscount);
 	    if (ret == PCRE2_ERROR_DFA_WSSIZE) {
 		old = wscount;
 		wscount += wscount / 2;
@@ -362,7 +389,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
     } else {
 	pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL);
 	ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
-		offset_start, 0, pcre_mdata, NULL);
+		offset_start, 0, pcre_mdata, mcontext);
     }
 
     if (ret==0) return_value = 0;
@@ -380,6 +407,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
     
     if (pcre_mdata)
 	pcre2_match_data_free(pcre_mdata);
+    if (mcontext)
+	pcre2_match_context_free(mcontext);
     zsfree(plaintext);
 
     return return_value;




Messages sorted by: Reverse Date, Date, Thread, Author