Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: nested ${(P)} (formerly SHWORDSPLIT and leading spaces)



On Sun, 8 Nov 2015 18:18:33 +0000
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx> wrote:
> - we spot on the way down this is a multsub when we reached the nested
> paramsubst() (we could do that with a pf_flags bit)
> 
> - we return a name of a parameter and a flag saying what we're doing on
> noticing this
> 
> - we pass this up to the to paramsubst() regardless of what else is going
> on in the prefork() and the multsub() --- this is the bit I a little scared
> of, but (P) is so explicit in what it's doing maybe that's OK
> 
> - in the upper paramsubst we use the flag to retrieve a Value based on
> the return, i.e. it now really is just a name for use up above as you
> might have been entitled to think it always was --- this is surely doable
> but I bet it's messier than you'd expect

This isn't obviously broken, yet.  You can explore while I think about
tests.

To get this work, I simply welded the parameter name from downbelow
together with the remainder of the expression in the outer
substitution.  It seems pretty much guaranteed to work, though it also
seems somehow unsatisfying.

In theory this should clearly separate transformations that happen on
the name you're going to pass up, and those on the result after name
lookup.

Hmmm... should I sanity check that the the substituted name is an
identifier, or is it a feature that

% array=(one two three)
% word='array[2]'
% print ${${(P)word}[2]}
w

works?  If the latter, is there any sanity checking I can do?

pws

diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index 20e0c8d..4c373d1 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1033,7 +1033,16 @@ var(name) used in this fashion.
 If used with a nested parameter or command substitution, the result of that
 will be taken as a parameter name in the same way.  For example, if you
 have `tt(foo=bar)' and `tt(bar=baz)', the strings tt(${(P)foo}),
-tt(${(P)${foo}}), and tt(${(P)$(echo bar)}) will be expanded to `tt(baz)'.
+tt(${(P)${foo}}), and tt(${(P)$(echo bar)}) will be expanded to
+`tt(baz)'.
+
+Likewise, if the reference is itself nested, the expression with the
+flag is treated as if it were directly replaced by the parameter name.
+It is an error if this nested substitution produces an array with more
+than one word.  For example, if `tt(name=assoc)' where the parameter
+tt(assoc) is an associative array, then
+`tt(${${(P)name}[elt]})' refers to the element of the associative
+subscripted `tt(elt)'.
 )
 item(tt(q))(
 Quote characters that are special to the shell in the resulting words with
diff --git a/Src/Zle/compctl.c b/Src/Zle/compctl.c
index bac533e..8381867 100644
--- a/Src/Zle/compctl.c
+++ b/Src/Zle/compctl.c
@@ -2116,7 +2116,7 @@ getreal(char *str)
 
     noerrs = 1;
     addlinknode(l, dupstring(str));
-    prefork(l, 0);
+    prefork(l, 0, NULL);
     noerrs = ne;
     if (!errflag && nonempty(l) &&
 	((char *) peekfirst(l)) && ((char *) peekfirst(l))[0])
@@ -3728,7 +3728,7 @@ makecomplistflags(Compctl cc, char *s, int incmd, int compadd)
 	errflag &= ~ERRFLAG_ERROR;
 	zcontext_restore();
 	/* Fine, now do full expansion. */
-	prefork(foo, 0);
+	prefork(foo, 0, NULL);
 	if (!errflag) {
 	    globlist(foo, 0);
 	    if (!errflag)
diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c
index e26f663..4e68549 100644
--- a/Src/Zle/zle_tricky.c
+++ b/Src/Zle/zle_tricky.c
@@ -2223,7 +2223,7 @@ doexpansion(char *s, int lst, int olst, int explincmd)
         else if (*ts == '\'')
             *ts = Snull;
     addlinknode(vl, ss);
-    prefork(vl, 0);
+    prefork(vl, 0, NULL);
     if (errflag)
 	goto end;
     if (lst == COMP_LIST_EXPAND || lst == COMP_EXPAND) {
diff --git a/Src/cond.c b/Src/cond.c
index df90656..c5ab65e 100644
--- a/Src/cond.c
+++ b/Src/cond.c
@@ -43,7 +43,7 @@ static void cond_subst(char **strp, int glob_ok)
 	checkglobqual(*strp, strlen(*strp), 1, NULL)) {
 	LinkList args = newlinklist();
 	addlinknode(args, *strp);
-	prefork(args, 0);
+	prefork(args, 0, NULL);
 	while (!errflag && args && nonempty(args) &&
 	       has_token((char *)peekfirst(args)))
 	    zglob(args, firstnode(args), 0);
diff --git a/Src/exec.c b/Src/exec.c
index f0d1d2f..c0ee527 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -2290,7 +2290,7 @@ addvars(Estate state, Wordcode pc, int addflags)
 
 	if (vl && htok) {
 	    prefork(vl, (isstr ? (PREFORK_SINGLE|PREFORK_ASSIGN) :
-			 PREFORK_ASSIGN));
+			 PREFORK_ASSIGN), NULL);
 	    if (errflag) {
 		state->pc = opc;
 		return;
@@ -2416,7 +2416,7 @@ void
 execsubst(LinkList strs)
 {
     if (strs) {
-	prefork(strs, esprefork);
+	prefork(strs, esprefork, NULL);
 	if (esglob && !errflag) {
 	    LinkList ostrs = strs;
 	    globlist(strs, 0);
@@ -2721,7 +2721,7 @@ execcmd(Estate state, int input, int output, int how, int last1)
     /* Do prefork substitutions */
     esprefork = (assign || isset(MAGICEQUALSUBST)) ? PREFORK_TYPESET : 0;
     if (args && htok)
-	prefork(args, esprefork);
+	prefork(args, esprefork, NULL);
 
     if (type == WC_SIMPLE || type == WC_TYPESET) {
 	int unglobbed = 0;
@@ -3558,7 +3558,7 @@ execcmd(Estate state, int input, int output, int how, int last1)
 				 */
 				/* Unused dummy value for name */
 				(void)ecgetstr(state, EC_DUPTOK, &htok);
-				prefork(&svl, PREFORK_TYPESET);
+				prefork(&svl, PREFORK_TYPESET, NULL);
 				if (errflag) {
 				    state->pc = opc;
 				    break;
@@ -3584,7 +3584,7 @@ execcmd(Estate state, int input, int output, int how, int last1)
 				}
 				continue;
 			    }
-			    prefork(&svl, PREFORK_SINGLE);
+			    prefork(&svl, PREFORK_SINGLE, NULL);
 			    name = empty(&svl) ? "" :
 				(char *)getdata(firstnode(&svl));
 			}
@@ -3600,7 +3600,9 @@ execcmd(Estate state, int input, int output, int how, int last1)
 			    } else {
 				if (htok) {
 				    init_list1(svl, val);
-				    prefork(&svl, PREFORK_SINGLE|PREFORK_ASSIGN);
+				    prefork(&svl,
+					    PREFORK_SINGLE|PREFORK_ASSIGN,
+					    NULL);
 				    if (errflag) {
 					state->pc = opc;
 					break;
@@ -3622,7 +3624,7 @@ execcmd(Estate state, int input, int output, int how, int last1)
 					  EC_DUPTOK, &htok);
 			    if (asg->value.array)
 			    {
-				prefork(asg->value.array, PREFORK_ASSIGN);
+				prefork(asg->value.array, PREFORK_ASSIGN, NULL);
 				if (errflag) {
 				    state->pc = opc;
 				    break;
diff --git a/Src/glob.c b/Src/glob.c
index 51ffeb5..94b3f62 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2093,7 +2093,7 @@ xpandredir(struct redir *fn, LinkList redirtab)
     /* Stick the name in a list... */
     init_list1(fake, fn->name);
     /* ...which undergoes all the usual shell expansions */
-    prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE);
+    prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE, NULL);
     /* Globbing is only done for multios. */
     if (!errflag && isset(MULTIOS))
 	globlist(&fake, 0);
diff --git a/Src/subst.c b/Src/subst.c
index febdc9b..f3a4ad4 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -44,15 +44,23 @@ char nulstring[] = {Nularg, '\0'};
  *  - Brace expansion
  *  - Tilde and equals substitution
  *
- * PREFORK_* flags are defined in zsh.h
+ * "flag"s contains PREFORK_* flags, defined in zsh.h.
+ *
+ * "ret_flags" is used to return values from nested parameter
+ * substitions.  It may be NULL in which case PREFORK_SUBEXP
+ * must not appear in flags; any return value from below
+ * will be discarded.
  */
 
 /**/
 mod_export void
-prefork(LinkList list, int flags)
+prefork(LinkList list, int flags, int *ret_flags)
 {
     LinkNode node, stop = 0;
     int keep = 0, asssub = (flags & PREFORK_TYPESET) && isset(KSHTYPESET);
+    int ret_flags_local = 0;
+    if (!ret_flags)
+	ret_flags = &ret_flags_local; /* will be discarded */
 
     queue_signals();
     for (node = firstnode(list); node; incnode(node)) {
@@ -75,10 +83,8 @@ prefork(LinkList list, int flags)
 	    setdata(node, cptr);
 	}
 	if (!(node = stringsubst(list, node,
-				 flags & (PREFORK_SINGLE|PREFORK_SPLIT|
-					  PREFORK_SHWORDSPLIT|
-					  PREFORK_NOSHWORDSPLIT),
-				 asssub))) {
+				 flags & ~(PREFORK_TYPESET|PREFORK_ASSIGN),
+				 ret_flags, asssub))) {
 	    unqueue_signals();
 	    return;
 	}
@@ -149,7 +155,8 @@ stringsubstquote(char *strstart, char **pstrdpos)
 
 /**/
 static LinkNode
-stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
+stringsubst(LinkList list, LinkNode node, int pf_flags, int *ret_flags,
+	    int asssub)
 {
     int qt;
     char *str3 = (char *)getdata(node);
@@ -235,7 +242,8 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub)
 		    pf_flags |= PREFORK_SHWORDSPLIT;
 		node = paramsubst(
 		    list, node, &str, qt,
-		    pf_flags & (PREFORK_SINGLE|PREFORK_SHWORDSPLIT));
+		    pf_flags & (PREFORK_SINGLE|PREFORK_SHWORDSPLIT|
+				PREFORK_SUBEXP), ret_flags);
 		if (errflag || !node)
 		    return NULL;
 		str3 = (char *)getdata(node);
@@ -413,29 +421,13 @@ singsub(char **s)
 
     init_list1(foo, *s);
 
-    prefork(&foo, PREFORK_SINGLE);
+    prefork(&foo, PREFORK_SINGLE, NULL);
     if (errflag)
 	return;
     *s = (char *) ugetnode(&foo);
     DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!");
 }
 
-/*
- * Bit flags passed back from multsub() to paramsubst().
- */
-enum {
-    /*
-     * Set if the string had whitespace at the start
-     * that should cause word splitting against any preceeding string.
-     */
-    WS_AT_START = 1,
-    /*
-     * Set if the string had whitespace at the end
-     * that should cause word splitting against any following string.
-     */
-    WS_AT_END = 2
-};
-
 /* Perform substitution on a single word, *s. Unlike with singsub(), the
  * result can be more than one word. If split is non-zero, the string is
  * first word-split using IFS, but only for non-quoted "whitespace" (as
@@ -448,13 +440,13 @@ enum {
  * NULL to use IFS).  The return value is true iff the expansion resulted
  * in an empty list.
  *
- * *ws_at_start is set to bits in the enum above as neeed.
+ * *ms_flags is set to bits in the enum above as neeed.
  */
 
 /**/
 static int
 multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
-	int *ws_sub)
+	int *ms_flags)
 {
     int l;
     char **r, **p, *x = *s;
@@ -470,7 +462,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
 	    l++;
 	    if (!iwsep(STOUC(c)))
 		break;
-	    *ws_sub |= WS_AT_START;
+	    *ms_flags |= MULTSUB_WS_AT_START;
 	}
     }
 
@@ -503,7 +495,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
 			    break;
 		    }
 		    if (!*x) {
-			*ws_sub |= WS_AT_END;
+			*ms_flags |= MULTSUB_WS_AT_END;
 			break;
 		    }
 		    insertlinknode(&foo, n, (void *)x), incnode(n);
@@ -532,7 +524,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
 	}
     }
 
-    prefork(&foo, pf_flags);
+    prefork(&foo, pf_flags, ms_flags);
     if (errflag) {
 	if (isarr)
 	    *isarr = 0;
@@ -1517,7 +1509,8 @@ check_colon_subscript(char *str, char **endp)
 
 /**/
 static LinkNode
-paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
+paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
+	   int *ret_flags)
 {
     char *aptr = *str, c, cc;
     char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
@@ -1747,7 +1740,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
      * whitespace.  However, if there's no "x" the whitespace is
      * simply removed.
      */
-    int ws_sub = 0;
+    int ms_flags = 0;
 
     *s++ = '\0';
     /*
@@ -2296,8 +2289,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	 * remove the aspar test and extract a value from an array, if
 	 * necessary, when we handle (P) lower down.
 	 */
-	if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL,
-		    &ws_sub) && quoted) {
+	if (multsub(&val, PREFORK_SUBEXP, (aspar ? NULL : &aval), &isarr, NULL,
+		    &ms_flags) && quoted) {
 	    /* Empty quoted string --- treat as null string, not elided */
 	    isarr = -1;
 	    aval = (char **) hcalloc(sizeof(char *));
@@ -2311,6 +2304,28 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	 */
 	while (inull(*s))
 	    s++;
+	if (ms_flags & MULTSUB_PARAM_NAME) {
+	    /*
+	     * Downbelow has told us this is a parameter name, e.g.
+	     * ${${(P)name}...}.  We're going to behave as if
+	     * we have exactly that name followed by the rest of
+	     * the parameter for subscripting etc.
+	     *
+	     * See below for where we set the flag in the nested
+	     * substitution.
+	     */
+	    if (isarr) {
+		if (aval[1]) {
+		    zerr("parameter name reference used with array");
+		    return NULL;
+		}
+		val = aval[0];
+		isarr = 0;
+	    }
+	    s = dyncat(val, s);
+	    /* Now behave po-faced as if it was always like that... */
+	    subexp = aspar = 0;
+	}
 	v = (Value) NULL;
     } else if (aspar) {
 	/*
@@ -2328,13 +2343,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	} else
 	    vunset = 1;
     }
+    if (aspar && (pf_flags & PREFORK_SUBEXP)) {
+	/*
+	 * This is the inner handling for the case referred to above
+	 * where we have something like ${${(P)name}...}.
+	 *
+	 * Treat this as as a normal value here; all transformations on
+	 * result are in outer instance.
+	 */
+	aspar = 0;
+	*ret_flags |= MULTSUB_PARAM_NAME;
+    }
     /*
      * We need to retrieve a value either if we haven't already
      * got it from a subexpression, or if the processing so
      * far has just yielded us a parameter name to be processed
      * with (P).
      */
-    if (!subexp || aspar) {
+    else if (!subexp || aspar) {
 	char *ov = val;
 
 	/*
@@ -2768,7 +2794,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 		    split_flags = PREFORK_NOSHWORDSPLIT;
 		}
 		multsub(&val, split_flags, (aspar ? NULL : &aval),
-			&isarr, NULL, &ws_sub);
+			&isarr, NULL, &ms_flags);
 		copied = 1;
 		spbreak = 0;
 		/* Leave globsubst on if forced */
@@ -2797,14 +2823,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 		     * behavior on caller choice of PREFORK_SHWORDSPLIT. */
 		    multsub(&val,
 			    spbreak ? PREFORK_SINGLE : PREFORK_NOSHWORDSPLIT,
-			    NULL, &isarr, NULL, &ws_sub);
+			    NULL, &isarr, NULL, &ms_flags);
 		} else {
 		    if (spbreak)
 			split_flags = PREFORK_SPLIT|PREFORK_SHWORDSPLIT;
 		    else
 			split_flags = PREFORK_NOSHWORDSPLIT;
 		    multsub(&val, split_flags, &aval, &isarr, NULL,
-			    &ws_sub);
+			    &ms_flags);
 		    spbreak = 0;
 		}
 		if (arrasg) {
@@ -3336,7 +3362,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	}
 	if (haserr || errflag)
 	    return NULL;
-	ws_sub = 0;
+	ms_flags = 0;
     }
     /*
      * This handles taking a length with ${#foo} and variations.
@@ -3375,7 +3401,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	sprintf(buf, "%ld", len);
 	val = dupstring(buf);
 	isarr = 0;
-	ws_sub = 0;
+	ms_flags = 0;
     }
     /* At this point we make sure that our arrayness has affected the
      * arrayness of the linked list.  Then, we can turn our value into
@@ -3405,7 +3431,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 	if (isarr) {
 	    val = sepjoin(aval, sep, 1);
 	    isarr = 0;
-	    ws_sub = 0;
+	    ms_flags = 0;
 	}
 	if (!ssub && (spbreak || spsep)) {
 	    aval = sepsplit(val, spsep, 0, 1);
@@ -3690,12 +3716,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
      * If a multsub result had whitespace at the start and we're
      * splitting and there's a previous string, now's the time to do so.
      */
-    if ((ws_sub & WS_AT_START) && aptr > ostr) {
+    if ((ms_flags & MULTSUB_WS_AT_START) && aptr > ostr) {
 	insertlinknode(l, n, dupstrpfx(ostr, aptr - ostr)), incnode(n);
 	ostr = aptr;
     }
     /* Likewise at the end */
-    if ((ws_sub & WS_AT_END) && *fstr) {
+    if ((ms_flags & MULTSUB_WS_AT_END) && *fstr) {
 	insertlinknode(l, n, dupstring(fstr)); /* appended, no incnode */
 	*fstr = '\0';
     }
@@ -3777,7 +3803,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags)
 
 	    *--fstr = Marker;
 	    init_list1(tl, fstr);
-	    if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0))
+	    if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, ret_flags, 0))
 		return NULL;
 	    *str = aptr;
 	    tn = firstnode(&tl);
diff --git a/Src/zsh.h b/Src/zsh.h
index a6f0397..d3bfcef 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1866,18 +1866,45 @@ enum {
 };
 
 /* Flags as the second argument to prefork */
-/* argument handled like typeset foo=bar */
-#define PREFORK_TYPESET	        0x01
-/* argument handled like the RHS of foo=bar */
-#define PREFORK_ASSIGN	        0x02
-/* single word substitution */
-#define PREFORK_SINGLE	        0x04
-/* explicitly split nested substitution */
-#define PREFORK_SPLIT           0x08
-/* SHWORDSPLIT in parameter expn */
-#define PREFORK_SHWORDSPLIT     0x10
-/* SHWORDSPLIT forced off in nested subst */
-#define PREFORK_NOSHWORDSPLIT   0x20
+enum {
+    /* argument handled like typeset foo=bar */
+    PREFORK_TYPESET       = 0x01,
+    /* argument handled like the RHS of foo=bar */
+    PREFORK_ASSIGN        = 0x02,
+    /* single word substitution */
+    PREFORK_SINGLE        = 0x04,
+    /* explicitly split nested substitution */
+    PREFORK_SPLIT         = 0x08,
+    /* SHWORDSPLIT in parameter expn */
+    PREFORK_SHWORDSPLIT   = 0x10,
+    /* SHWORDSPLIT forced off in nested subst */
+    PREFORK_NOSHWORDSPLIT = 0x20,
+    /* Prefork is part of a parameter subexpression */
+    PREFORK_SUBEXP        = 0x40
+};
+
+/*
+ * Bit flags passed back from multsub() to paramsubst().
+ * Some flags go from a nested parmsubst() through the enclosing
+ * stringsubst() and prefork().
+ */
+enum {
+    /*
+     * Set if the string had whitespace at the start
+     * that should cause word splitting against any preceeding string.
+     */
+    MULTSUB_WS_AT_START = 1,
+    /*
+     * Set if the string had whitespace at the end
+     * that should cause word splitting against any following string.
+     */
+    MULTSUB_WS_AT_END   = 2,
+    /*
+     * Set by nested paramsubst() to indicate the return
+     * value is a parameter name, rather than a value.
+     */
+    MULTSUB_PARAM_NAME  = 4
+};
 
 /*
  * Structure for adding parameters in a module.



Messages sorted by: Reverse Date, Date, Thread, Author