Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Inconsistency with SHWORDSPLIT and leading spaces



On Fri, 6 Nov 2015 11:54:49 +0100
Christian Neukirchen <chneukirchen@xxxxxxxxx> wrote:
> juno ~% zsh -c 'setopt shwordsplit; x=foo; echo x${x:+ $x}' 
> xfoo

This is unlikely to be deliberate --- we should do *something* with the
space, not pretend it's not there.

This makes already insane code even worse, but it doesn't seem to have
broken anything in the tests.

Possible breakage is I haven't spotted some case where we use multsub()
with word splitting but we need to reset the knock-on splitting
(ws_at_start) for some reason similar to transforming the result into a
length (though I've picked that particular one up).

pws

diff --git a/Src/subst.c b/Src/subst.c
index 021d234..5289121 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -430,11 +430,16 @@ singsub(char **s)
  * set to 1.  Otherwise, *isarr is set to 0, and the result is put into *s,
  * with any necessary joining of multiple elements using sep (which can be
  * NULL to use IFS).  The return value is true iff the expansion resulted
- * in an empty list. */
+ * in an empty list.
+ *
+ * *ws_at_start is set to 1 if the string had whitespace at thes start
+ * that should cause word splitting against any preceeding string.
+ */
 
 /**/
 static int
-multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep)
+multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
+	int *ws_at_start)
 {
     int l;
     char **r, **p, *x = *s;
@@ -450,6 +455,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep)
 	    l++;
 	    if (!iwsep(STOUC(c)))
 		break;
+	    *ws_at_start = 1;
 	}
     }
 
@@ -1717,6 +1723,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
      * This is for compatibility.
      */
     int horrible_offset_hack = 0;
+    /*
+     * Signal back from multsub: with something like
+     *   x${:- $foo}
+     * with word-splitting active we need to split on that leading
+     * whitespace.  However, if there's no "x" the whitespace is
+     * simply removed.
+     */
+    int ws_at_start = 0;
 
     *s++ = '\0';
     /*
@@ -2265,7 +2279,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	 * remove the aspar test and extract a value from an array, if
 	 * necessary, when we handle (P) lower down.
 	 */
-	if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
+	if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL,
+		    &ws_at_start) && quoted) {
 	    /* Empty quoted string --- treat as null string, not elided */
 	    isarr = -1;
 	    aval = (char **) hcalloc(sizeof(char *));
@@ -2736,7 +2751,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 		    split_flags = PREFORK_NOSHWORDSPLIT;
 		}
 		multsub(&val, split_flags, (aspar ? NULL : &aval),
-			&isarr, NULL);
+			&isarr, NULL, &ws_at_start);
 		copied = 1;
 		spbreak = 0;
 		/* Leave globsubst on if forced */
@@ -2765,13 +2780,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 		     * behavior on caller choice of PREFORK_SHWORDSPLIT. */
 		    multsub(&val,
 			    spbreak ? PREFORK_SINGLE : PREFORK_NOSHWORDSPLIT,
-			    NULL, &isarr, NULL);
+			    NULL, &isarr, NULL, &ws_at_start);
 		} else {
 		    if (spbreak)
 			split_flags = PREFORK_SPLIT|PREFORK_SHWORDSPLIT;
 		    else
 			split_flags = PREFORK_NOSHWORDSPLIT;
-		    multsub(&val, split_flags, &aval, &isarr, NULL);
+		    multsub(&val, split_flags, &aval, &isarr, NULL,
+			    &ws_at_start);
 		    spbreak = 0;
 		}
 		if (arrasg) {
@@ -3303,6 +3319,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	}
 	if (haserr || errflag)
 	    return NULL;
+	ws_at_start = 0;
     }
     /*
      * This handles taking a length with ${#foo} and variations.
@@ -3341,6 +3358,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	sprintf(buf, "%ld", len);
 	val = dupstring(buf);
 	isarr = 0;
+	ws_at_start = 0;
     }
     /* At this point we make sure that our arrayness has affected the
      * arrayness of the linked list.  Then, we can turn our value into
@@ -3370,6 +3388,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	if (isarr) {
 	    val = sepjoin(aval, sep, 1);
 	    isarr = 0;
+	    ws_at_start = 0;
 	}
 	if (!ssub && (spbreak || spsep)) {
 	    aval = sepsplit(val, spsep, 0, 1);
@@ -3649,6 +3668,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
      * equivalent and only locally decide if we need to treat it
      * as a scalar.)
      */
+
+    /*
+     * If a multsub result had whitespace at the start and we're
+     * splitting and there's a previous string, now's the time to do so.
+     */
+    if (ws_at_start && aptr > ostr) {
+	insertlinknode(l, n, dupstrpfx(ostr, aptr - ostr)), incnode(n);
+	ostr = aptr;
+    }
     if (isarr) {
 	char *x;
 	char *y;
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index cb7079c..59c14a4 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1744,3 +1744,57 @@
 >3_4_5_6
 >6
 >1_2_3_4_5_6
+
+ (setopt shwordsplit
+ do_test() {
+   print $#: "$@"
+ }
+ foo=bar
+ foo2="bar bar"
+ do_test ${:- foo}
+ do_test ${:- foo bar}
+ do_test ${:- $foo}
+ do_test ${:- $foo2}
+ do_test x${:- foo}
+ do_test x${:- foo bar}
+ do_test x${:- $foo}
+ do_test x${:- $foo2}
+ do_test x${foo:+ $foo}
+ )
+0:We Love SH_WORD_SPLIT Day celebrated with space at start of internal subst
+>1: foo
+>2: foo bar
+>1: bar
+>2: bar bar
+>2: x foo
+>3: x foo bar
+>2: x bar
+>3: x bar bar
+>2: x bar
+
+ (unsetopt shwordsplit # default, for clarity
+ do_test() {
+   print $#: "$@"
+ }
+ foo=bar
+ foo2="bar bar"
+ do_test ${:- foo}
+ do_test ${:- foo bar}
+ do_test ${:- $foo}
+ do_test ${:- $foo2}
+ do_test x${:- foo}
+ do_test x${:- foo bar}
+ do_test x${:- $foo}
+ do_test x${:- $foo2}
+ do_test x${foo:+ $foo}
+ )
+0:We Love NO_SH_WORD_SPLIT Even More Day celebrated as sanity check
+>1:  foo
+>1:  foo bar
+>1:  bar
+>1:  bar bar
+>1: x foo
+>1: x foo bar
+>1: x bar
+>1: x bar bar
+>1: x bar



Messages sorted by: Reverse Date, Date, Thread, Author