Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Bug with bash emulation regarding ':'



On Sun, 05 Feb 2012 13:21:46 -0800
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> (I wonder if changing the boolean "split" parameter to multsub() into a
> set of bitflags would suffice.)

It took some thinking about, but I think the following is about the most
minimal code.  Even allowing for the fact that explicit splitting is
separate from sh-wordsplitting (it's a stronger condition), it needed a
third flag to distinguish the cases...

- SHWORDSPLIT is on normally.
- SHWORDSPLIT isn't on, but is required by a recursive substitution.
- SHWORDSPLIT needs to be forced off by a recursive substitution.

At least I think so.

There's now no temporary setting of SHWORDSPLIT itself and the special
flags are only tested on the way through to paramsubst(), so command
substitution doesn't seem them.

I don't know if this helps with the other issues you mentioned.

Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.130
diff -p -u -r1.130 subst.c
--- Src/subst.c	21 Dec 2011 22:39:28 -0000	1.130
+++ Src/subst.c	10 Feb 2012 20:54:23 -0000
@@ -74,7 +74,10 @@ prefork(LinkList list, int flags)
 	     */
 	    setdata(node, cptr);
 	}
-	if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
+	if (!(node = stringsubst(list, node,
+				 flags & (PF_SINGLE|PF_SPLIT|
+					  PF_SHWORDSPLIT|PF_NOSHWORDSPLIT),
+				 asssub))) {
 	    unqueue_signals();
 	    return;
 	}
@@ -145,7 +148,7 @@ stringsubstquote(char *strstart, char **
 
 /**/
 static LinkNode
-stringsubst(LinkList list, LinkNode node, int ssub, int asssub)
+stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 {
     int qt;
     char *str3 = (char *)getdata(node);
@@ -213,7 +216,11 @@ stringsubst(LinkList list, LinkNode node
 		setdata(node, (void *) str3);
 		continue;
 	    } else {
-		node = paramsubst(list, node, &str, qt, ssub);
+		if ((isset(SHWORDSPLIT) && !(pf_flags & PF_NOSHWORDSPLIT)) ||
+		    (pf_flags & PF_SPLIT))
+		    pf_flags |= PF_SHWORDSPLIT;
+		node = paramsubst(list, node, &str, qt,
+				  pf_flags & (PF_SINGLE|PF_SHWORDSPLIT));
 		if (errflag || !node)
 		    return NULL;
 		str3 = (char *)getdata(node);
@@ -268,7 +275,7 @@ stringsubst(LinkList list, LinkNode node
 		       (qt && str[1] == '"'))))
 		    *str = ztokens[c - Pound];
 	    str++;
-	    if (!(pl = getoutput(str2 + 1, qt || ssub))) {
+	    if (!(pl = getoutput(str2 + 1, qt || (pf_flags & PF_SINGLE)))) {
 		zerr("parse error in command substitution");
 		return NULL;
 	    }
@@ -278,7 +285,7 @@ stringsubst(LinkList list, LinkNode node
 		str = strcpy(str2, str);
 		continue;
 	    }
-	    if (!qt && ssub && isset(GLOBSUBST))
+	    if (!qt && (pf_flags & PF_SINGLE) && isset(GLOBSUBST))
 		shtokenize(s);
 	    l1 = str2 - str3;
 	    l2 = strlen(s);
@@ -306,7 +313,7 @@ stringsubst(LinkList list, LinkNode node
 	     * We are in a normal argument which looks like an assignment
 	     * and is to be treated like one, with no word splitting.
 	     */
-	    ssub = 1;
+	    pf_flags |= PF_SINGLE;
 	}
 	str++;
     }
@@ -392,13 +399,13 @@ singsub(char **s)
 
 /**/
 static int
-multsub(char **s, int split, char ***a, int *isarr, char *sep)
+multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep)
 {
     int l;
     char **r, **p, *x = *s;
     local_list1(foo);
 
-    if (split) {
+    if (pf_flags & PF_SPLIT) {
 	/*
 	 * This doesn't handle multibyte characters, but we're
 	 * looking for whitespace separators which must be ASCII.
@@ -413,7 +420,7 @@ multsub(char **s, int split, char ***a, 
 
     init_list1(foo, x);
 
-    if (split) {
+    if (pf_flags & PF_SPLIT) {
 	LinkNode n = firstnode(&foo);
 	int inq = 0, inp = 0;
 	MB_METACHARINIT();
@@ -467,7 +474,7 @@ multsub(char **s, int split, char ***a, 
 	}
     }
 
-    prefork(&foo, 0);
+    prefork(&foo, pf_flags);
     if (errflag) {
 	if (isarr)
 	    *isarr = 0;
@@ -1437,7 +1444,7 @@ check_colon_subscript(char *str, char **
 
 /**/
 static LinkNode
-paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
+paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 {
     char *aptr = *str, c, cc;
     char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
@@ -1514,7 +1521,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * where we shouldn't, in particular on the multsubs for
      * handling embedded values for ${...=...} and the like.
      */
-    int spbreak = isset(SHWORDSPLIT) && !ssub && !qt;
+    int spbreak = (pf_flags & PF_SHWORDSPLIT) && !(pf_flags & PF_SINGLE) && !qt;
     /* Scalar and array value, see isarr above */
     char *val = NULL, **aval = NULL;
     /*
@@ -1564,6 +1571,11 @@ paramsubst(LinkList l, LinkNode n, char 
      */
     int shsplit = 0;
     /*
+     * "ssub" is true when we are called from singsub (via prefork):
+     * it means that we must join arrays and should not split words.
+     */
+    int ssub = (pf_flags & PF_SINGLE);
+    /*
      * The separator from (j) and (s) respectively, or (F) and (f)
      * respectively (hardwired to "\n" in that case).  Slightly
      * confusingly also used for ${#pm}, thought that's at least
@@ -1620,7 +1632,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * This is one of the things that decides whether multsub
      * will produce an array, but in an extremely indirect fashion.
      */
-    int nojoin = isset(SHWORDSPLIT) ? !(ifs && *ifs) : 0;
+    int nojoin = (pf_flags & PF_SHWORDSPLIT) ? !(ifs && *ifs) : 0;
     /*
      * != 0 means ${...}, otherwise $...  What works without braces
      * is largely a historical artefact (everything works with braces,
@@ -2618,7 +2630,7 @@ paramsubst(LinkList l, LinkNode n, char 
 	/* Fall Through! */
 	case '-':
 	    if (vunset) {
-		int ws = opts[SHWORDSPLIT];
+		int split_flags;
 		val = dupstring(s);
 		/* If word-splitting is enabled, we ask multsub() to split
 		 * the substituted string at unquoted whitespace.  Then, we
@@ -2627,9 +2639,20 @@ paramsubst(LinkList l, LinkNode n, char 
 		 * keep its array splits, and weird constructs such as
 		 * ${str+"one two" "3 2 1" foo "$str"} to only be split
 		 * at the unquoted spaces. */
-		opts[SHWORDSPLIT] = spbreak;
-		multsub(&val, spbreak && !aspar, (aspar ? NULL : &aval), &isarr, NULL);
-		opts[SHWORDSPLIT] = ws;
+		if (spbreak) {
+		    split_flags = PF_SHWORDSPLIT;
+		    if (!aspar)
+			split_flags |= PF_SPLIT;
+		} else {
+		    /*
+		     * It's not good enough not passing the flag to use
+		     * SHWORDSPLIT, because when we get to a nested
+		     * paramsubst we need to ignore isset(SHWORDSPLIT).
+		     */
+		    split_flags = PF_NOSHWORDSPLIT;
+		}
+		multsub(&val, split_flags, (aspar ? NULL : &aval),
+			&isarr, NULL);
 		copied = 1;
 		spbreak = 0;
 		/* Leave globsubst on if forced */
@@ -2647,21 +2670,21 @@ paramsubst(LinkList l, LinkNode n, char 
 	case '=':
 	case Equals:
 	    if (vunset) {
-		int ws = opts[SHWORDSPLIT];
 		char sav = *idend;
-		int l;
+		int l, split_flags;
 
 		*idend = '\0';
 		val = dupstring(s);
 		if (spsep || !arrasg) {
-		    opts[SHWORDSPLIT] = 0;
-		    multsub(&val, 0, NULL, &isarr, NULL);
+		    multsub(&val, PF_NOSHWORDSPLIT, NULL, &isarr, NULL);
 		} else {
-		    opts[SHWORDSPLIT] = spbreak;
-		    multsub(&val, spbreak, &aval, &isarr, NULL);
+		    if (spbreak)
+			split_flags = PF_SPLIT|PF_SHWORDSPLIT;
+		    else
+			split_flags = PF_NOSHWORDSPLIT;
+		    multsub(&val, split_flags, &aval, &isarr, NULL);
 		    spbreak = 0;
 		}
-		opts[SHWORDSPLIT] = ws;
 		if (arrasg) {
 		    /* This is an array assignment. */
 		    char *arr[2], **t, **a, **p;
@@ -3118,8 +3141,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * (afterward) may split the joined value (e.g. (s:-:) sets "spsep").  One
      * exception is that ${name:-word} and ${name:+word} will have already
      * done any requested splitting of the word value with quoting preserved.
-     * "ssub" is true when we are called from singsub (via prefork):
-     * it means that we must join arrays and should not split words. */
+     */
     if (ssub || (spbreak && isarr >= 0) || spsep || sep) {
 	if (isarr) {
 	    val = sepjoin(aval, sep, 1);
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.179
diff -p -u -r1.179 zsh.h
--- Src/zsh.h	8 Jan 2012 16:02:55 -0000	1.179
+++ Src/zsh.h	10 Feb 2012 20:54:23 -0000
@@ -1648,6 +1648,9 @@ enum {
 #define PF_TYPESET	0x01	/* argument handled like typeset foo=bar */
 #define PF_ASSIGN	0x02	/* argument handled like the RHS of foo=bar */
 #define PF_SINGLE	0x04	/* single word substitution */
+#define PF_SPLIT        0x08    /* explicitly split nested substitution */
+#define PF_SHWORDSPLIT  0x10    /* SHWORDSPLIT in parameter expn */
+#define PF_NOSHWORDSPLIT 0x20   /* SHWORDSPLIT forced off in nested subst */
 
 /*
  * Structure for adding parameters in a module.
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.60
diff -p -u -r1.60 D04parameter.ztst
--- Test/D04parameter.ztst	17 Aug 2011 19:00:10 -0000	1.60
+++ Test/D04parameter.ztst	10 Feb 2012 20:54:23 -0000
@@ -255,6 +255,20 @@
 >two
 >words
 
+  (setopt shwordsplit # ensure this doesn't get set in main shell...
+  test_splitting ()
+  {
+    array="one two three"
+    for e in $array; do
+      echo "'$e'"
+    done
+  }
+  test_split_var=
+  : ${test_split_var:=$(test_splitting)}
+  echo "_${test_split_var}_")
+0:SH_WORD_SPLIT inside $(...) inside ${...}
+>_'one' 'two' 'three'_
+
   print -l "${(f)$(print first line\\nsecond line\\nthird line)}"
 0:${(f)$(...)}
 >first line

-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author