Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: [^ax-y] doesn't work but [^x-ya] does



On Sat, 4 Mar 2017 15:21:43 -0800
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> On Mar 2, 11:53am, Peter Stephenson wrote:
> }
> } Here's the brute force approach --- it passes all tests, but I bet
> } there's more fall out to come, and it's all over the place, so I'm not
> } sure if there might be a better way.
> 
> Maybe the better way is a macro along the lines of those in ztype.h?
> Perhaps idash(X) ?   Doesn't change the number of places in the code
> that have to be touched, but perhaps cleaner.

A bit neater; I've avoided it looking too much the itype macros as it's
a bit different, explicitly looking for either the raw or tokenized form
rather than a type.  But I suppose it would be straightforward to mark
both with a bit; I don't think there's a huge difference either way.

I may just commit this and see how it works.

pws

diff --git a/Src/cond.c b/Src/cond.c
index 8ab0193..9b739f6 100644
--- a/Src/cond.c
+++ b/Src/cond.c
@@ -138,13 +138,13 @@ evalcond(Estate state, char *fromtest)
 		strs = arrdup(sbuf);
 		l = 2;
 	    }
-	    if (name && name[0] == '-')
+	    if (name && IS_DASH(name[0]))
 		errname = name;
-	    else if (strs[0] && *strs[0] == '-')
+	    else if (strs[0] && IS_DASH(*strs[0]))
 		errname = strs[0];
 	    else
 		errname = "<null>";
-	    if (name && name[0] == '-' &&
+	    if (name && IS_DASH(name[0]) &&
 		(cd = getconddef((ctype == COND_MODI), name + 1, 1))) {
 		if (ctype == COND_MOD &&
 		    (l < cd->min || (cd->max >= 0 && l > cd->max))) {
@@ -171,7 +171,7 @@ evalcond(Estate state, char *fromtest)
 		strs[0] = dupstring(name);
 		name = s;
 
-		if (name && name[0] == '-' &&
+		if (name && IS_DASH(name[0]) &&
 		    (cd = getconddef(0, name + 1, 1))) {
 		    if (l < cd->min || (cd->max >= 0 && l > cd->max)) {
 			zwarnnam(fromtest, "unknown condition: %s",
diff --git a/Src/exec.c b/Src/exec.c
index 83d1513..3a8c268 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -2779,9 +2779,10 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 		char *argdata = (char *) getdata(argnode);
 		char *cmdopt;
 		int has_p = 0, has_vV = 0, has_other = 0;
-		while (*argdata == '-') {
+		while (IS_DASH(*argdata)) {
 		    /* Just to be definite, stop on single "-", too, */
-		    if (!argdata[1] || (argdata[1] == '-' && !argdata[2]))
+		    if (!argdata[1] ||
+			(IS_DASH(argdata[1]) && !argdata[2]))
 			break;
 		    for (cmdopt = argdata+1; *cmdopt; cmdopt++) {
 			switch (*cmdopt) {
@@ -2835,7 +2836,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 		 * as if this is command [non-option-stuff].  This
 		 * isn't a good place for standard option handling.
 		 */
-		if (!strcmp(argdata, "--"))
+		if (IS_DASH(argdata[0]) && IS_DASH(argdata[1]) && !argdata[2])
 		     uremnode(args, firstnode(args));
 	    }
 	    if ((cflags & BINF_EXEC) && nextnode(firstnode(args))) {
@@ -2855,7 +2856,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 		 * people aren't likely to mix the option style
 		 * with the zsh style.
 		 */
-		while (next && *next == '-' && strlen(next) >= 2) {
+		while (next && IS_DASH(*next) && strlen(next) >= 2) {
 		    if (!firstnode(args)) {
 			zerr("exec requires a command to execute");
 			lastval = 1;
@@ -2863,7 +2864,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 			goto done;
 		    }
 		    uremnode(args, firstnode(args));
-		    if (!strcmp(next, "--"))
+		    if (IS_DASH(next[0]) && IS_DASH(next[1]) && !next[2])
 			break;
 		    for (cmdopt = &next[1]; *cmdopt; ++cmdopt) {
 			switch (*cmdopt) {
diff --git a/Src/glob.c b/Src/glob.c
index ff6b258..87127e1 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -1314,6 +1314,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 		    sense ^= 1;
 		    break;
 		case '-':
+		case Dash:
 		    /* Toggle matching of symbolic links */
 		    sense ^= 2;
 		    break;
@@ -1608,7 +1609,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 			    ++s;
 		    }
 		    /* See if it's greater than, equal to, or less than */
-		    if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0))
+		    if ((g_range = *s == '+' ? 1 : IS_DASH(*s) ? -1 : 0))
 			++s;
 		    data = qgetnum(&s);
 		    break;
@@ -2025,13 +2026,13 @@ hasbraces(char *str)
 		if (bracechardots(str-1, NULL, NULL))
 		    return 1;
 		lbr = str - 1;
-		if (*str == '-')
+		if (IS_DASH(*str))
 		    str++;
 		while (idigit(*str))
 		    str++;
 		if (*str == '.' && str[1] == '.') {
 		    str++; str++;
-		    if (*str == '-')
+		    if (IS_DASH(*str))
 			str++;
 		    while (idigit(*str))
 			str++;
@@ -2040,7 +2041,7 @@ hasbraces(char *str)
 			return 1;
 		    else if (*str == '.' && str[1] == '.') {
 			str++; str++;
-			if (*str == '-')
+			if (IS_DASH(*str))
 			    str++;
 			while (idigit(*str))
 			    str++;
@@ -2123,7 +2124,7 @@ xpandredir(struct redir *fn, LinkList redirtab)
 	fn->name = s;
 	untokenize(s);
 	if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) {
-	    if (s[0] == '-' && !s[1])
+	    if (IS_DASH(s[0]) && !s[1])
 		fn->type = REDIR_CLOSE;
 	    else if (s[0] == 'p' && !s[1])
 		fn->fd2 = -2;
@@ -2329,12 +2330,14 @@ xpandbraces(LinkList list, LinkNode *np)
 	     * str+1 is the first number in the range, dots+2 the last,
 	     * and dots2+2 is the increment if that's given. */
 	    /* TODO: sorry about this */
-	    int minw = (str[1] == '0' || (str[1] == '-' && str[2] == '0'))
+	    int minw = (str[1] == '0' ||
+			(IS_DASH(str[1]) && str[2] == '0'))
 		       ? wid1
-		       : (dots[2] == '0' || (dots[2] == '-' && dots[3] == '0'))
+		       : (dots[2] == '0' ||
+			  (IS_DASH(dots[2]) && dots[3] == '0'))
 		       ? wid2
 		       : (dots2 && (dots2[2] == '0' ||
-				    (dots2[2] == '-' && dots2[3] == '0')))
+				    (IS_DASH(dots2[2]) && dots2[3] == '0')))
 		       ? wid3
 		       : 0;
 	    if (rincr < 0) {
@@ -2392,7 +2395,7 @@ xpandbraces(LinkList list, LinkNode *np)
 		c2 = ztokens[c2 - STOUC(Pound)];
 	    if ((char) c2 == Meta)
 		c2 = 32 ^ p[1];
-	    if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) {
+	    if (IS_DASH(c1) && lastch >= 0 && p < str2 && lastch <= (int)c2) {
 		while (lastch < (int)c2)
 		    ccl[lastch++] = 1;
 		lastch = -1;
@@ -3528,7 +3531,7 @@ zshtokenize(char *s, int flags)
 	    }
 	    t = s;
 	    while (idigit(*++s));
-	    if (*s != '-')
+	    if (!IS_DASH(*s))
 		goto cont;
 	    while (idigit(*++s));
 	    if (*s != '>')
diff --git a/Src/lex.c b/Src/lex.c
index 8896128..59e9d14 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -1359,17 +1359,13 @@ gettokstr(int c, int sub)
 	case LX2_DASH:
 	    /*
 	     * - shouldn't be treated as a special character unless
-	     * we're in a pattern.  Howeve,simply  counting "[" doesn't
-	     * work as []a-z] is a valid expression and we don't know
-	     * down here what this "[" is for as $foo[stuff] is valid
-	     * in zsh.  So just detect an opening [, which is enough
-	     * to turn this into a pattern; the Dash will be harmlessly
-	     * untokenised if not wanted.
+	     * we're in a pattern.  Unfortunately, working out for
+	     * sure in complicated expressions whether we're in a
+	     * pattern is tricky.  So we'll make it special and
+	     * turn it back any time we don't need it special.
+	     * This is not ideal as it's a lot of work.
 	     */
-	    if (seen_brct)
-		c = Dash;
-           else
-               c = '-';
+	    c = Dash;
            break;
        case LX2_BANG:
            /*
diff --git a/Src/math.c b/Src/math.c
index f19c0ed..f961300 100644
--- a/Src/math.c
+++ b/Src/math.c
@@ -463,7 +463,7 @@ lexconstant(void)
     char *nptr;
 
     nptr = ptr;
-    if (*nptr == '-')
+    if (IS_DASH(*nptr))
 	nptr++;
 
     if (*nptr == '0') {
@@ -527,7 +527,7 @@ lexconstant(void)
 	}
 	if (*nptr == 'e' || *nptr == 'E') {
 	    nptr++;
-	    if (*nptr == '+' || *nptr == '-')
+	    if (*nptr == '+' || IS_DASH(*nptr))
 		nptr++;
 	    while (idigit(*nptr) || *nptr == '_')
 		nptr++;
@@ -599,7 +599,8 @@ zzlex(void)
 	    }
 	    return (unary) ? UPLUS : PLUS;
 	case '-':
-	    if (*ptr == '-') {
+	case Dash:
+	    if (IS_DASH(*ptr)) {
 		ptr++;
 		return (unary) ? PREMINUS : POSTMINUS;
 	    }
diff --git a/Src/parse.c b/Src/parse.c
index 699ea49..6fe283d 100644
--- a/Src/parse.c
+++ b/Src/parse.c
@@ -2317,6 +2317,19 @@ par_cond_1(void)
 }
 
 /*
+ * Return 1 if condition matches.  This also works for non-elided options.
+ *
+ * input is test string, may begin - or Dash.
+ * cond is condition following the -.
+ */
+static int check_cond(const char *input, const char *cond)
+{
+    if (!IS_DASH(input[0]))
+	return 0;
+    return !strcmp(input + 1, cond);
+}
+
+/*
  * cond_2	: BANG cond_2
 				| INPAR { SEPER } cond_2 { SEPER } OUTPAR
 				| STRING STRING STRING
@@ -2342,7 +2355,7 @@ par_cond_2(void)
 	    s1 = tokstr;
 	    condlex();
 	    /* ksh behavior: [ -t ] means [ -t 1 ]; bash disagrees */
-	    if (unset(POSIXBUILTINS) && !strcmp(s1, "-t"))
+	    if (unset(POSIXBUILTINS) && check_cond(s1, "t"))
 		return par_cond_double(s1, dupstring("1"));
 	    return par_cond_double(dupstring("-n"), s1);
 	}
@@ -2352,7 +2365,7 @@ par_cond_2(void)
 	    if (!strcmp(*testargs, "=")  ||
 		!strcmp(*testargs, "==") ||
 		!strcmp(*testargs, "!=") ||
-		(**testargs == '-' && get_cond_num(*testargs + 1) >= 0)) {
+		(IS_DASH(**testargs) && get_cond_num(*testargs + 1) >= 0)) {
 		s1 = tokstr;
 		condlex();
 		s2 = tokstr;
@@ -2374,8 +2387,8 @@ par_cond_2(void)
 	 * In "test" compatibility mode, "! -a ..." and "! -o ..."
 	 * are treated as "[string] [and] ..." and "[string] [or] ...".
 	 */
-	if (!(n_testargs > 1 &&
-	      (!strcmp(*testargs, "-a") || !strcmp(*testargs, "-o"))))
+	if (!(n_testargs > 1 && (check_cond(*testargs, "a") ||
+				 check_cond(*testargs, "o"))))
 	{
 	    condlex();
 	    ecadd(WCB_COND(COND_NOT, 0));
@@ -2397,7 +2410,7 @@ par_cond_2(void)
 	return r;
     }
     s1 = tokstr;
-    dble = (s1 && *s1 == '-'
+    dble = (s1 && IS_DASH(*s1)
 	    && (!n_testargs
 		|| strspn(s1+1, "abcdefghknoprstuvwxzLONGS") == 1)
 	    && !s1[2]);
@@ -2411,7 +2424,7 @@ par_cond_2(void)
 	    YYERROR(ecused);
     }
     condlex();
-    if (n_testargs == 2 && tok != STRING && tokstr && s1[0] == '-') {
+    if (n_testargs == 2 && tok != STRING && tokstr && IS_DASH(s1[0])) {
 	/*
 	 * Something like "test -z" followed by a token.
 	 * We'll turn the token into a string (we've also
@@ -2446,9 +2459,9 @@ par_cond_2(void)
 	} else
 	    YYERROR(ecused);
     }
-    s2 = tokstr;   
+    s2 = tokstr;
     if (!n_testargs)
-	dble = (s2 && *s2 == '-' && !s2[2]);
+	dble = (s2 && IS_DASH(*s2) && !s2[2]);
     incond++;			/* parentheses do globbing */
     do condlex(); while (COND_SEP());
     incond--;			/* parentheses do grouping */
@@ -2476,7 +2489,7 @@ par_cond_2(void)
 static int
 par_cond_double(char *a, char *b)
 {
-    if (a[0] != '-' || !a[1])
+    if (!IS_DASH(a[0]) || !a[1])
 	COND_ERROR("parse error: condition expected: %s", a);
     else if (!a[2] && strspn(a+1, "abcdefgknoprstuvwxzhLONGS") == 1) {
 	ecadd(WCB_COND(a[1], 0));
@@ -2534,7 +2547,7 @@ par_cond_triple(char *a, char *b, char *c)
 	ecadd(WCB_COND(COND_REGEX, 0));
 	ecstr(a);
 	ecstr(c);
-    } else if (b[0] == '-') {
+    } else if (IS_DASH(b[0])) {
 	if ((t0 = get_cond_num(b + 1)) > -1) {
 	    ecadd(WCB_COND(t0 + COND_NT, 0));
 	    ecstr(a);
@@ -2545,7 +2558,7 @@ par_cond_triple(char *a, char *b, char *c)
 	    ecstr(a);
 	    ecstr(c);
 	}
-    } else if (a[0] == '-' && a[1]) {
+    } else if (IS_DASH(a[0]) && a[1]) {
 	ecadd(WCB_COND(COND_MOD, 2));
 	ecstr(a);
 	ecstr(b);
@@ -2560,7 +2573,7 @@ par_cond_triple(char *a, char *b, char *c)
 static int
 par_cond_multi(char *a, LinkList l)
 {
-    if (a[0] != '-' || !a[1])
+    if (!IS_DASH(a[0]) || !a[1])
 	COND_ERROR("condition expected: %s", a);
     else {
 	LinkNode n;
@@ -3256,10 +3269,10 @@ build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)
     for (hlen = FD_PRELEN, tlen = 0; *files; files++) {
 	struct stat st;
 
-	if (!strcmp(*files, "-k")) {
+	if (check_cond(*files, "k")) {
 	    flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_KSHLOAD;
 	    continue;
-	} else if (!strcmp(*files, "-z")) {
+	} else if (check_cond(*files, "z")) {
 	    flags = (flags & ~(FDHF_KSHLOAD | FDHF_ZSHLOAD)) | FDHF_ZSHLOAD;
 	    continue;
 	}
diff --git a/Src/pattern.c b/Src/pattern.c
index 928790f..75db016 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -1521,7 +1521,7 @@ patcomppiece(int *flagp, int paren)
 		patparse = nptr;
 		len |= 1;
 	    }
-	    DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
+	    DPUTS(!IS_DASH(*patparse), "BUG: - missing from numeric glob");
 	    patparse++;
 	    if (idigit(*patparse)) {
 		to = (zrange_t) zstrtol((char *)patparse,
diff --git a/Src/subst.c b/Src/subst.c
index 02dbe28..2214b3d 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -481,6 +481,8 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
 	for ( ; *x; x += l) {
 	    int rawc = -1;
 	    convchar_t c;
+	    if (*x == Dash)
+		*x = '-';
 	    if (itok(STOUC(*x))) {
 		/* token, can't be separator, must be single byte */
 		rawc = *x;
@@ -1766,7 +1768,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      */
     c = *s;
     if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
-	c != '-' && c != '!' && c != '$' && c != String && c != Qstring &&
+	!IS_DASH(c) &&
+	c != '!' && c != '$' && c != String && c != Qstring &&
 	c != '?' && c != Quest &&
 	c != '*' && c != Star && c != '@' && c != '{' &&
 	c != Inbrace && c != '=' && c != Equals && c != Hat &&
@@ -1895,13 +1898,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 		    if (quotetype == QT_DOLLARS ||
 			quotetype == QT_BACKSLASH_PATTERN)
 			goto flagerr;
-		    if (s[1] == '-' || s[1] == '+') {
+		    if (IS_DASH(s[1]) || s[1] == '+') {
 			if (quotemod)
 			    goto flagerr;
 			s++;
 			quotemod = 1;
-			quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL :
-			    QT_QUOTEDZPUTS;
+			quotetype = (*s == '+') ? QT_QUOTEDZPUTS :
+			    QT_SINGLE_OPTIONAL;
 		    } else {
 			if (quotetype == QT_SINGLE_OPTIONAL) {
 			    /* extra q's after '-' not allowed */
@@ -2208,9 +2211,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 		     * properly in the first place we wouldn't
 		     * have this nonsense.
 		     */
-		    || ((cc == '#' || cc == Pound) &&
-			s[2] == Outbrace)
-		    || cc == '-' || (cc == ':' && s[2] == '-')
+		    || ((cc == '#' || cc == Pound) && s[2] == Outbrace)
+		    || IS_DASH(cc)
+		    || (cc == ':' && IS_DASH(s[2]))
 		    || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
 	    getlen = 1 + whichlen, s++;
 	    /*
@@ -2605,14 +2608,17 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      * Again, this duplicates tests for characters we're about to
      * examine properly later on.
      */
-    if (inbrace &&
-	(c = *s) != '-' && c != '+' && c != ':' && c != '%'  && c != '/' &&
-	c != '=' && c != Equals &&
-	c != '#' && c != Pound &&
-	c != '?' && c != Quest &&
-	c != '}' && c != Outbrace) {
-	zerr("bad substitution");
-	return NULL;
+    if (inbrace) {
+	c = *s;
+	if (!IS_DASH(c) &&
+	    c != '+' && c != ':' && c != '%'  && c != '/' &&
+	    c != '=' && c != Equals &&
+	    c != '#' && c != Pound &&
+	    c != '?' && c != Quest &&
+	    c != '}' && c != Outbrace) {
+	    zerr("bad substitution");
+	    return NULL;
+	}
     }
     /*
      * Join arrays up if we're in quotes and there isn't some
@@ -2690,8 +2696,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
     /* Check for ${..?..} or ${..=..} or one of those. *
      * Only works if the name is in braces.            */
 
-    if (inbrace && ((c = *s) == '-' ||
-		    c == '+' ||
+    if (inbrace && ((c = *s) == '+' ||
+		    IS_DASH(c) ||
 		    c == ':' ||	/* i.e. a doubled colon */
 		    c == '=' || c == Equals ||
 		    c == '%' ||
@@ -2802,6 +2808,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	    vunset = 1;
 	/* Fall Through! */
 	case '-':
+	case Dash:
 	    if (vunset) {
 		int split_flags;
 		val = dupstring(s);
diff --git a/Src/utils.c b/Src/utils.c
index 7f3ddad..9669944 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -2376,7 +2376,7 @@ zstrtol_underscore(const char *s, char **t, int base, int underscore)
     while (inblank(*s))
 	s++;
 
-    if ((neg = (*s == '-')))
+    if ((neg = IS_DASH(*s)))
 	s++;
     else if (*s == '+')
 	s++;
@@ -6118,7 +6118,9 @@ quotedzputs(char const *s, FILE *stream)
 	} else
 	    *ptr++ = '\'';
 	while(*s) {
-	    if (*s == Meta)
+	    if (*s == Dash)
+		c = '-';
+	    else if (*s == Meta)
 		c = *++s ^ 32;
 	    else
 		c = *s;
@@ -6155,7 +6157,9 @@ quotedzputs(char const *s, FILE *stream)
     } else {
 	/* use Bourne-style quoting, avoiding empty quoted strings */
 	while (*s) {
-	    if (*s == Meta)
+	    if (*s == Dash)
+		c = '-';
+	    else if (*s == Meta)
 		c = *++s ^ 32;
 	    else
 		c = *s;
diff --git a/Src/zsh.h b/Src/zsh.h
index f2c2790..ce41b17 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -238,6 +238,16 @@ struct mathfunc {
 #define PATCHARS "#^*()|[]<>?~\\"
 
 /*
+ * Check for a possibly tokenized dash.
+ *
+ * A dash only needs to be a token in a character range, [a-z], but
+ * it's difficult in general to ensure that.  So it's turned into
+ * a token at the usual point in the lexer.  However, we need
+ * to check for a literal dash at many opints.
+ */
+#define IS_DASH(x) ((x) == '-' || (x) == Dash)
+
+/*
  * Types of quote.  This is used in various places, so care needs
  * to be taken when changing them.  (Oooh, don't you look surprised.)
  * - Passed to quotestring() to indicate style.  This is the ultimate
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 1385d57..413381f 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -686,3 +686,9 @@
  rm glob.tmp/link
 0:modifier ':P' resolves symlinks before '..' components
 *>*glob.tmp/hello/world
+
+ foo=a
+ value="ac"
+ print ${value//[${foo}b-z]/x}
+0:handling of - range in complicated pattern context
+>xx



Messages sorted by: Reverse Date, Date, Thread, Author