Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: start and end pattern assertions



We're missing one thing from standard regexps:  the ability to test whether
we're at the start or end of the string.  This is a pain when you want to
make sure something is a path segment but don't care whether it's at the
start or end of the path, or sandwiched between slashes.

The programming is easy, since the engine is essentially a standard regexp
one, but the syntax is more of a problem.  About the only thing which won't
cause grief somewhere else is to to invent globbing flags to do the trick.
Hence I invented (#s), which is like `^' in regexps, and (#e), which is
like `$' in regexps.  Sorry this is a bit clumsy --- although they still
probably won't be used that often.  Now you can do e.g.
  print **/*~*((#s)|/)CVS((#e)|/)*(.)
(yuk) which will exclude things in CVS directories but not other stuff with
the string CVS in.  Actually you could just have ...CVS/*(.) in this case.

One other fix: `zsh -f ztst.zsh 11glob.ztst' didn't work because the
assumptions about the name of the script were too simple.  They still are,
but you can't have everything.

The largest change in the source code is actually just reindentation; the
functional changes are very minor since all the framework was there.

I won't commit this till I get home.  Beats writing hardware initialization
code I don't understand.

Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.3
diff -u -r1.3 expn.yo
--- Doc/Zsh/expn.yo	2000/04/05 19:29:15	1.3
+++ Doc/Zsh/expn.yo	2000/04/06 14:22:20
@@ -1299,6 +1299,17 @@
 Approximate matching: var(num) errors are allowed in the string matched by
 the pattern.  The rules for this are described in the next subsection.
 )
+item(tt(s), tt(e))(
+Unlike the other flags, these have only a local effect, and each must
+appear on its own:  `tt((#s))' and `tt((#e))' are the only valid forms.
+The `tt((#s))' flag succeeds only at the start of the test string, and the
+`tt((#e))' flag succeeds only at the end of the test string; they
+correspond to `tt(^)' and `tt($)' in standard regular expressions.  They
+are useful for matching path segments in patterns.  For example,
+`tt(*((#s)|/)test((#e)|/)*)' matches a path segment `tt(test)' in any of
+the following strings: tt(test), tt(test/at/start), tt(at/end/test),
+tt(in/test/middle).
+)
 enditem()
 
 For example, the test string tt(fooxx) can be matched by the pattern
Index: Misc/globtests
===================================================================
RCS file: /cvsroot/zsh/zsh/Misc/globtests,v
retrieving revision 1.1.1.7
diff -u -r1.1.1.7 globtests
--- Misc/globtests	1999/09/01 14:15:28	1.1.1.7
+++ Misc/globtests	2000/04/06 14:22:20
@@ -167,5 +167,17 @@
 f read.me       (#ia1)README~READ.ME
 t read.me       (#ia1)README~READ_ME
 f read.me       (#ia1)README~(#a1)READ_ME
+t test          *((#s)|/)test((#e)|/)*
+t test/path     *((#s)|/)test((#e)|/)*
+t path/test     *((#s)|/)test((#e)|/)*
+t path/test/ohyes *((#s)|/)test((#e)|/)*
+f atest         *((#s)|/)test((#e)|/)*
+f testy         *((#s)|/)test((#e)|/)*
+f testy/path    *((#s)|/)test((#e)|/)*
+f path/atest    *((#s)|/)test((#e)|/)*
+f atest/path    *((#s)|/)test((#e)|/)*
+f path/testy    *((#s)|/)test((#e)|/)*
+f path/testy/ohyes *((#s)|/)test((#e)|/)*
+f path/atest/ohyes *((#s)|/)test((#e)|/)*
 EOT
 print "$failed tests failed."
Index: Src/pattern.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/pattern.c,v
retrieving revision 1.3
diff -u -r1.3 pattern.c
--- Src/pattern.c	2000/04/04 01:16:25	1.3
+++ Src/pattern.c	2000/04/06 14:22:22
@@ -83,6 +83,8 @@
 #define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
 #define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
 #define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
+#define P_ISSTART 0x09  /* no   Match start of string. */
+#define P_ISEND   0x0a  /* no   Match end of string. */
 /* numbered so we can test bit 5 for a branch */
 #define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
 #define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
@@ -645,34 +647,44 @@
 	    /* Globbing flags. */
 	    char *pp1 = patparse;
 	    int oldglobflags = patglobflags;
+	    long assert;
 	    patparse += (*patparse == '@') ? 3 : 2;
-	    if (!patgetglobflags(&patparse))
-		return 0;	    
-	    if (pp1 == patstart) {
-		/* Right at start of pattern, the simplest case.
-		 * Put them into the flags and don't emit anything.
-		 */
-		((Patprog)patout)->globflags = patglobflags;
-		continue;
-	    } else if (!*patparse) {
-		/* Right at the end, so just leave the flags for
-		 * the next Patprog in the chain to pick up.
+	    if (!patgetglobflags(&patparse, &assert))
+		return 0;
+	    if (assert) {
+		/*
+		 * Start/end assertion looking like flags, but
+		 * actually handled as a normal node
 		 */
-		break;
-	    }
-	    /*
-	     * Otherwise, we have to stick them in as a pattern
-	     * matching nothing.
-	     */
-	    if (oldglobflags != patglobflags) {
-		/* Flags changed */
-		union upat up;
-		latest = patnode(P_GFLAGS);
-		up.l = patglobflags;
-		patadd((char *)&up, 0, sizeof(union upat), 0);
+		latest = patnode(assert);
+		flags = 0;
 	    } else {
-		/* No effect. */
-		continue;
+		if (pp1 == patstart) {
+		    /* Right at start of pattern, the simplest case.
+		     * Put them into the flags and don't emit anything.
+		     */
+		    ((Patprog)patout)->globflags = patglobflags;
+		    continue;
+		} else if (!*patparse) {
+		    /* Right at the end, so just leave the flags for
+		     * the next Patprog in the chain to pick up.
+		     */
+		    break;
+		}
+		/*
+		 * Otherwise, we have to stick them in as a pattern
+		 * matching nothing.
+		 */
+		if (oldglobflags != patglobflags) {
+		    /* Flags changed */
+		    union upat up;
+		    latest = patnode(P_GFLAGS);
+		    up.l = patglobflags;
+		    patadd((char *)&up, 0, sizeof(union upat), 0);
+		} else {
+		    /* No effect. */
+		    continue;
+		}
 	    }
 	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
 	    /*
@@ -707,10 +719,12 @@
 
 /**/
 int
-patgetglobflags(char **strp)
+patgetglobflags(char **strp, long *assertp)
 {
     char *nptr, *ptr = *strp;
     zlong ret;
+
+    *assertp = 0;
     /* (#X): assumes we are still positioned on the first X */
     for (; *ptr && *ptr != Outpar; ptr++) {
 	switch (*ptr) {
@@ -763,12 +777,23 @@
 	    patglobflags &= ~GF_MATCHREF;
 	    break;
 
+	case 's':
+	    *assertp = P_ISSTART;
+	    break;
+
+	case 'e':
+	    *assertp = P_ISEND;
+	    break;
+
 	default:
 	    return 0;
 	}
     }
     if (*ptr != Outpar)
 	return 0;
+    /* Start/end assertions must appear on their own. */
+    if (*assertp && (*strp)[1] != Outpar)
+	return 0;
     *strp = ptr + 1;
     return 1;
 }
@@ -1989,6 +2014,14 @@
 	     * anything here.
 	     */
 	    return 0;
+	case P_ISSTART:
+	    if (patinput != patinstart)
+		fail = 1;
+	    break;
+	case P_ISEND:
+	    if (*patinput)
+		fail = 1;
+	    break;
 	case P_END:
 	    if (!(fail = (*patinput && !(patflags & PAT_NOANCH))))
 		return 1;
@@ -2386,6 +2419,12 @@
 	break;
     case P_GFLAGS:
 	p = "GFLAGS";
+	break;
+    case P_ISSTART:
+	p = "ISSTART";
+	break;
+    case P_ISEND:
+	p = "ISEND";
 	break;
     case P_NOTHING:
 	p = "NOTHING";
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.1.1.53
diff -u -r1.1.1.53 subst.c
--- Src/subst.c	2000/03/01 04:10:39	1.1.1.53
+++ Src/subst.c	2000/04/06 14:22:23
@@ -1975,7 +1975,7 @@
 		if (*ptr1) {
 		    zsfree(hsubl);
 		    hsubl = ztrdup(ptr1);
-		}
+ 		}
 		if (!hsubl) {
 		    zerr("no previous substitution", NULL, 0);
 		    return;
Index: Test/11glob.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/11glob.ztst,v
retrieving revision 1.2
diff -u -r1.2 11glob.ztst
--- Test/11glob.ztst	2000/04/01 20:49:48	1.2
+++ Test/11glob.ztst	2000/04/06 14:22:24
@@ -162,6 +162,18 @@
 >1:  [[ read.me = (#ia1)README~READ.ME ]]
 >0:  [[ read.me = (#ia1)README~READ_ME ]]
 >1:  [[ read.me = (#ia1)README~(#a1)READ_ME ]]
+>0:  [[ test = *((#s)|/)test((#e)|/)* ]]
+>0:  [[ test/path = *((#s)|/)test((#e)|/)* ]]
+>0:  [[ path/test = *((#s)|/)test((#e)|/)* ]]
+>0:  [[ path/test/ohyes = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ atest = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ testy = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ testy/path = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/atest = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ atest/path = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/testy = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]]
+>1:  [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]]
 >0 tests failed.
 
   globtest globtests.ksh
Index: Test/ztst.zsh
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/ztst.zsh,v
retrieving revision 1.1.1.12
diff -u -r1.1.1.12 ztst.zsh
--- Test/ztst.zsh	2000/03/24 12:48:02	1.1.1.12
+++ Test/ztst.zsh	2000/04/06 14:22:24
@@ -47,8 +47,13 @@
 ZTST_testdir=$PWD
 ZTST_testname=$1
 
-# The source directory is not necessarily the current directory
-ZTST_srcdir=${0%/*}
+# The source directory is not necessarily the current directory,
+# but if $0 doesn't contain a `/' assume it is.
+if [[ $0 = */* ]]; then
+  ZTST_srcdir=${0%/*}
+else
+  ZTST_srcdir=$PWD
+fi
 [[ $ZTST_srcdir = /* ]] || ZTST_srcdir="$ZTST_testdir/$ZTST_srcdir"
 
 # Set the function autoload paths to correspond to this build of zsh.

pws



Messages sorted by: Reverse Date, Date, Thread, Author