Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: fixing ${1+"$@"} when word-splitting



On Sun, Feb 12, 2006 at 08:26:17PM +0000, Peter Stephenson wrote:
> Frankly, it's such a mess at the moment that I think any practical
> improvement is a good thing.

OK.  Here's something even better.  This appears to make us totally
compatible with bash.  I did the following:

I added an extra arg to multsub() that lets the caller request that the
string be split on unquoted whitespace (as defined by IFS) before being
parsed.  The code that handles ${...+...} uses this new flag, and then
disables spbreak to prevent any further splitting.  While I was at it,
I fixed the comment for multsub() (it needed to be updated for when it
returns an array vs a string).

Because the docs say that using ${=foo} splits like SH_WORD_SPLIT, I
made an explicit '=' work the same way (yeah, I'm waffling on this
issue).  For example:

  % set 1 '2 3 4' 5 '6 7'
  % for w in ${=1+"$@" 'extra words' 'and such here'}; echo $w
  1
  2 3 4
  5
  6 7
  extra words
  and such here
  % for w in ${==1+more "$@" 'extra words' 'and such here'}; echo $w
  more 1
  2 3 4
  5
  6 7 extra words and such here

My checking for quoted whitespace currently honors Dnull, Snull, Tick,
Inpar, and Outpar.  Is that everything?

Attached is the patch (which also includes the good changes from my last
email).  Let me know how you like it.

..wayne..
--- Src/subst.c	6 Feb 2006 11:57:06 -0000	1.44
+++ Src/subst.c	13 Feb 2006 10:39:31 -0000
@@ -295,29 +295,76 @@ singsub(char **s)
     DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!");
 }
 
-/* Perform substitution on a single word. Unlike with singsub, the      *
- * result can have more than one word. A single word result is stored   *
- * in *s and *isarr is set to zero; otherwise *isarr is set to 1 and    *
- * the result is stored in *a. If `a' is zero a multiple word result is *
- * joined using sep or the IFS parameter if sep is zero and the result  *
- * is returned in *s.  The return value is true iff the expansion       *
- * resulted in an empty list.                                           *
- * The mult_isarr variable is used by paramsubst() to tell if it yields *
- * an array.                                                            */
+/* Perform substitution on a single word, *s. Unlike with singsub(), the
+ * result can be more than one word. If split is non-zero, the string is
+ * first word-split using IFS, but only for non-quoted whitespace (as
+ * indicated by Snull and Dnull chars).
+ *
+ * If "a" was non-NULL, the resulting string(s) are stored in *a (even for a
+ * 1-element array) and *isarr is set to 1.  Otherwise, *isarr is set to 0,
+ * and the result is stored in *s, with multiple elements joined together
+ * using sep (which can be NULL to use IFS).  The return value is true iff
+ * the expansion resulted in an empty list.
+ *
+ * The mult_isarr variable is used by paramsubst() to tell if it yields an
+ * array. */
 
 /**/
 static int mult_isarr;
 
 /**/
 static int
-multsub(char **s, char ***a, int *isarr, UNUSED(char *sep))
+multsub(char **s, int split, char ***a, int *isarr, char *sep)
 {
     int l, omi = mult_isarr;
-    char **r, **p;
+    char **r, **p, *x = *s;
     local_list1(foo);
 
     mult_isarr = 0;
-    init_list1(foo, *s);
+
+    if (split) {
+	for ( ; *x; x += l+1) {
+	    char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
+	    if (!iwsep(c))
+		break;
+	}
+    }
+
+    init_list1(foo, x);
+
+    if (split) {
+	LinkNode n = firstnode(&foo);
+	int inq = 0, inp = 0;
+	for ( ; *x; x += l+1) {
+	    char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
+	    if (!inq && !inp && isep(c)) {
+		*x++ = '\0';
+		for (x += l; *x; x += l+1) {
+		    c = (l = *x == Meta) ? x[1] ^ 32 : *x;
+		    if (!isep(c))
+			break;
+		}
+		if (!*x)
+		    break;
+		insertlinknode(&foo, n, (void *)x), incnode(n);
+	    }
+	    switch (c) {
+	    case Dnull:
+	    case Snull:
+	    case Tick:
+		/* These always occur in unnested pairs. */
+		inq = !inq;
+		break;
+	    case Inpar:
+		inp++;
+		break;
+	    case Outpar:
+		inp--;
+		break;
+	    }
+	}
+    }
+
     prefork(&foo, 0);
     if (errflag) {
 	if (isarr)
@@ -325,7 +372,7 @@ multsub(char **s, char ***a, int *isarr,
 	mult_isarr = omi;
 	return 0;
     }
-    if ((l = countlinknodes(&foo))) {
+    if ((l = countlinknodes(&foo)) > (a ? 0 : 1)) {
 	p = r = hcalloc((l + 1) * sizeof(char*));
 	while (nonempty(&foo))
 	    *p++ = (char *)ugetnode(&foo);
@@ -345,7 +392,7 @@ multsub(char **s, char ***a, int *isarr,
 	    mult_isarr = omi;
 	    return 0;
 	}
-	*s = sepjoin(r, NULL, 1);
+	*s = sepjoin(r, sep, 1);
 	mult_isarr = omi;
 	if (isarr)
 	    *isarr = 0;
@@ -1457,7 +1504,7 @@ paramsubst(LinkList l, LinkNode n, char 
 	 * remove the aspar test and extract a value from an array, if
 	 * necessary, when we handle (P) lower down.
 	 */
-	if (multsub(&val, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
+	if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
 	    /* Empty quoted string --- treat as null string, not elided */
 	    isarr = -1;
 	    aval = (char **) hcalloc(sizeof(char *));
@@ -1993,25 +2040,16 @@ paramsubst(LinkList l, LinkNode n, char 
 	case '-':
 	    if (vunset) {
 		val = dupstring(s);
-		/*
-		 * This is not good enough for sh emulation!  Sh would
-		 * split unquoted substrings, yet not split quoted ones
-		 * (except according to $@ rules); but this leaves the
-		 * unquoted substrings unsplit, and other code below
-		 * for spbreak splits even within the quoted substrings.
-		 *
-		 * TODO: I think multsub needs to be told enough to
-		 * decide about splitting with spbreak at this point
-		 * (and equally in the `=' handler below).  Then
-		 * we can turn off spbreak to avoid the join & split
-		 * nastiness later.
-		 *
-		 * What we really want to do is make this look as
-		 * if it were the result of an assignment from
-		 * the same value, taking account of quoting.
-		 */
-		multsub(&val, (aspar ? NULL : &aval), &isarr, NULL);
+		/* If word-splitting is enabled, we ask multsub() to split
+		 * the substituted string at unquoted whitespace.  Then, we
+		 * turn off spbreak so that no further splitting occurs.
+		 * This allows a construct such as ${1+"$@"} to correctly
+		 * keep its array splits, and weird constructs such as
+		 * ${str+"one two" "3 2 1" foo "$str"} to only be split
+		 * at the unquoted spaces. */
+		multsub(&val, spbreak && !aspar, (aspar ? NULL : &aval), &isarr, NULL);
 		copied = 1;
+		spbreak = 0;
 	    }
 	    break;
 	case ':':
@@ -2029,7 +2067,6 @@ paramsubst(LinkList l, LinkNode n, char 
 
 		*idend = '\0';
 		val = dupstring(s);
-		isarr = 0;
 		/*
 		 * TODO: this is one of those places where I don't
 		 * think we want to do the joining until later on.
@@ -2037,9 +2074,9 @@ paramsubst(LinkList l, LinkNode n, char 
 		 * point and unset them.
 		 */
 		if (spsep || spbreak || !arrasg)
-		    multsub(&val, NULL, NULL, sep);
+		    multsub(&val, 0, NULL, &isarr, NULL);
 		else
-		    multsub(&val, &aval, &isarr, NULL);
+		    multsub(&val, 0, &aval, &isarr, NULL);
 		if (arrasg) {
 		    /*
 		     * This is an array assignment in a context
--- Test/D04parameter.ztst	11 Oct 2005 16:48:06 -0000	1.13
+++ Test/D04parameter.ztst	13 Feb 2006 10:23:51 -0000
@@ -547,20 +547,16 @@
 >shell
 >again
 
-  set If "this test fails" maybe "we have finally fixed" the shell
+  set Make sure "this test keeps" on "preserving all" quoted whitespace
   print -l ${=1+"$@"}
 0:Regression test of unfixed ${=1+"$@"} bug
->If
->this
->test
->fails
->maybe
->we
->have
->finally
->fixed
->the
->shell
+>Make
+>sure
+>this test keeps
+>on
+>preserving all
+>quoted
+>whitespace
 
   unset SHLVL
   (( SHLVL++ ))


Messages sorted by: Reverse Date, Date, Thread, Author