Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: parse error in process substitution



On Mon, 10 Nov 2008 09:21:52 +0100
Louis-David Mitterrand <vindex+lists-zsh-users@xxxxxxxxxxx> wrote:
> So how would you convert that working bash command to zsh?
> 
> 	root-tail <(ssh root@xxxxxxxxxxxx tail -F /var/log/kern.log),red,

This fixes the syntax.  Unfortunately, I had to impose some limitations
to prevent existing stuff stopping working.  Obviously I'd like to
know about anything else this messes up.  I'm not worried about
the undocumented feature that <(...) forced a new word.

A non-initial = usually isn't handled specially, and raw parentheses
(i.e. without a disambiguating character in front) are somewhat
overworked in zsh, so I couldn't make =(...) special anywhere except at
the start of an argument.  However, you can now put other things after
it.

">" and "<" never used to need quoting when used inside parentheses or
parameter substitutions, so to keep this I've had to forbid the use of
<(...) and >(...) in such places.  (The comptest function falls over if
this isn't done.)  This isn't likely to be a big problem
in practice.  Because of the previous rule this isn't relevant to
=(...).

Parsing of the contents of these expressions is done rather more simply
than for $(...) expressions.  I don't really know if there's any mileage
in making parsing of process substitutions more similar (inside the
parentheses only, the context dependence will have to remain
different).

Index: README
===================================================================
RCS file: /cvsroot/zsh/zsh/README,v
retrieving revision 1.58
diff -u -r1.58 README
--- README	30 Oct 2008 12:18:54 -0000	1.58
+++ README	13 Nov 2008 20:46:40 -0000
@@ -69,6 +69,16 @@
 consistent with recent versions of other shells.  The option
 DEBUG_BEFORE_CMD can be unset to revert to the previous behaviour.
 
+Previously, process substitutions of the form =(...), <(...) and
+>(...) were only handled if they appeared as separate command arguments,
+although the latter two forms caused a new command argument to be
+started at that point.  Now all three may be followed by other strings,
+and the latter two may also be preceeded by other string.  None may
+occur inside parameter substitutions, or inside parentheses used for
+grouping of patterns, in order to avoid clashes with cases where
+tt(<) or tt(>) where not treated specially in previous versions of the
+shell.
+
 In previous versions of the shell it was possible to use index 0 in an
 array or string subscript to refer to the same element as index 1 if the
 option KSH_ARRAYS was not in effect.  This was a limited approximation to
Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.97
diff -u -r1.97 expn.yo
--- Doc/Zsh/expn.yo	26 Oct 2008 17:57:13 -0000	1.97
+++ Doc/Zsh/expn.yo	13 Nov 2008 20:46:41 -0000
@@ -353,11 +353,17 @@
 sect(Process Substitution)
 cindex(process substitution)
 cindex(substitution, process)
-Each command argument of the form
+Each part of a command argument that takes the form
 `tt(<LPAR())var(list)tt(RPAR())',
 `tt(>LPAR())var(list)tt(RPAR())' or
 `tt(=LPAR())var(list)tt(RPAR())'
-is subject to process substitution.
+is subject to process substitution.  The expression may be proceeded
+or followed by other strings except that, to prevent clashes with
+commonly occurring strings and patterns, the last
+form must occur at the start of a command argument, and none of
+the forms may occur inside parentheses used for grouping of patterns or
+inside parameter substitutions.
+
 In the case of the tt(<) or tt(>) forms, the shell runs process
 var(list) asynchronously.  If the system supports the tt(/dev/fd)
 mechanism, the command argument is the name of the device file
Index: Src/exec.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/exec.c,v
retrieving revision 1.160
diff -u -r1.160 exec.c
--- Src/exec.c	10 Oct 2008 09:55:34 -0000	1.160
+++ Src/exec.c	13 Nov 2008 20:46:43 -0000
@@ -3560,7 +3560,7 @@
 
 /**/
 static Eprog
-parsecmd(char *cmd)
+parsecmd(char *cmd, char **eptr)
 {
     char *str;
     Eprog prog;
@@ -3571,7 +3571,9 @@
 	return NULL;
     }
     *str = '\0';
-    if (str[1] || !(prog = parse_string(cmd + 2, 0))) {
+    if (eptr)
+	*eptr = str+1;
+    if (!(prog = parse_string(cmd + 2, 0))) {
 	zerr("parse error in process substitution");
 	return NULL;
     }
@@ -3582,7 +3584,7 @@
 
 /**/
 char *
-getoutputfile(char *cmd)
+getoutputfile(char *cmd, char **eptr)
 {
     pid_t pid;
     char *nam;
@@ -3592,7 +3594,7 @@
 
     if (thisjob == -1)
 	return NULL;
-    if (!(prog = parsecmd(cmd)))
+    if (!(prog = parsecmd(cmd, eptr)))
 	return NULL;
     if (!(nam = gettempname(NULL, 0)))
 	return NULL;
@@ -3677,7 +3679,7 @@
 
 /**/
 char *
-getproc(char *cmd)
+getproc(char *cmd, char **eptr)
 {
 #if !defined(HAVE_FIFOS) && !defined(PATH_DEV_FD)
     zerr("doesn't look like your system supports FIFOs.");
@@ -3696,7 +3698,7 @@
 	return NULL;
     if (!(pnam = namedpipe()))
 	return NULL;
-    if (!(prog = parsecmd(cmd)))
+    if (!(prog = parsecmd(cmd, eptr)))
 	return NULL;
     if (!jobtab[thisjob].filelist)
 	jobtab[thisjob].filelist = znewlinklist();
@@ -3723,7 +3725,7 @@
     if (thisjob == -1)
 	return NULL;
     pnam = hcalloc(strlen(PATH_DEV_FD) + 6);
-    if (!(prog = parsecmd(cmd)))
+    if (!(prog = parsecmd(cmd, eptr)))
 	return NULL;
     mpipe(pipes);
     if ((pid = zfork(&bgtime))) {
@@ -3772,7 +3774,7 @@
     pid_t pid;
     struct timeval bgtime;
 
-    if (!(prog = parsecmd(cmd)))
+    if (!(prog = parsecmd(cmd, NULL)))
 	return -1;
     mpipe(pipes);
     if ((pid = zfork(&bgtime))) {
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.46
diff -u -r1.46 lex.c
--- Src/lex.c	31 Aug 2008 19:50:49 -0000	1.46
+++ Src/lex.c	13 Nov 2008 20:46:44 -0000
@@ -835,7 +835,7 @@
 	return OUTPAR;
     case LX1_INANG:
 	d = hgetc();
-	if (!incmdpos && d == '(') {
+	if (d == '(') {
 	    hungetc(d);
 	    lexstop = 0;
 	    unpeekfd:
@@ -1152,20 +1152,13 @@
 		c = Comma;
 	    break;
 	case LX2_OUTANG:
-	    if (!intpos) {
-		if (in_brace_param || sub)
-		    break;
-		else
-		    goto brk;
-	    }
+	    if (in_brace_param || sub)
+		break;
 	    e = hgetc();
 	    if (e != '(') {
 		hungetc(e);
 		lexstop = 0;
-		if (in_brace_param || sub)
-		    break;
-		else
-		    goto brk;
+		goto brk;
 	    }
 	    add(Outang);
 	    if (skipcomm()) {
@@ -1178,7 +1171,7 @@
 	    if (isset(SHGLOB) && sub)
 		break;
 	    e = hgetc();
-	    if(e == '(' && intpos) {
+	    if (!(in_brace_param || sub) && e == '(') {
 		add(Inang);
 		if (skipcomm()) {
 		    peek = LEXERR;
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.90
diff -u -r1.90 subst.c
--- Src/subst.c	30 Oct 2008 15:34:18 -0000	1.90
+++ Src/subst.c	13 Nov 2008 20:46:45 -0000
@@ -56,43 +56,27 @@
 
     queue_signals();
     for (node = firstnode(list); node; incnode(node)) {
-	char *str, c;
-
-	str = (char *)getdata(node);
-	if (((c = *str) == Inang || c == Outang || c == Equals) &&
-	    str[1] == Inpar) {
-	    if (c == Inang || c == Outang)
-		setdata(node, (void *) getproc(str));	/* <(...) or >(...) */
-	    else
-		setdata(node, (void *) getoutputfile(str));	/* =(...) */
-	    if (!getdata(node)) {
-		setdata(node, dupstring(""));
-		unqueue_signals();
-		return;
-	    }
-	} else {
-	    if (isset(SHFILEEXPANSION)) {
-		/*
-		 * Here and below we avoid taking the address
-		 * of a void * and then pretending it's a char **
-		 * instead of a void ** by a little inefficiency.
-		 * This could be avoided with some extra linked list
-		 * machinery, but that would need quite a lot of work
-		 * to ensure consistency.  What we really need is
-		 * templates...
-		 */
-		char *cptr = (char *)getdata(node);
-		filesub(&cptr, flags & (PF_TYPESET|PF_ASSIGN));
-		/*
-		 * The assignment is so simple it's not worth
-		 * testing if cptr changed...
-		 */
-		setdata(node, cptr);
-	    }
-	    if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
-		unqueue_signals();
-		return;
-	    }
+	if (isset(SHFILEEXPANSION)) {
+	    /*
+	     * Here and below we avoid taking the address
+	     * of a void * and then pretending it's a char **
+	     * instead of a void ** by a little inefficiency.
+	     * This could be avoided with some extra linked list
+	     * machinery, but that would need quite a lot of work
+	     * to ensure consistency.  What we really need is
+	     * templates...
+	     */
+	    char *cptr = (char *)getdata(node);
+	    filesub(&cptr, flags & (PF_TYPESET|PF_ASSIGN));
+	    /*
+	     * The assignment is so simple it's not worth
+	     * testing if cptr changed...
+	     */
+	    setdata(node, cptr);
+	}
+	if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
+	    unqueue_signals();
+	    return;
 	}
     }
     for (node = firstnode(list); node; incnode(node)) {
@@ -168,7 +152,37 @@
     char *str  = str3, c;
 
     while (!errflag && (c = *str)) {
-	if ((qt = c == Qstring) || c == String) {
+	if ((c == Inang || c == Outang || (str == str3 && c == Equals)) &&
+	    str[1] == Inpar) {
+	    char *subst, *rest, *snew, *sptr;
+	    int str3len = str - str3, sublen, restlen;
+
+	    if (c == Inang || c == Outang)
+		subst = getproc(str, &rest);	/* <(...) or >(...) */
+	    else
+		subst = getoutputfile(str, &rest);	/* =(...) */
+	    if (!subst)
+		subst = "";
+
+	    sublen = strlen(subst);
+	    restlen = strlen(rest);
+	    sptr = snew = hcalloc(str3len + sublen + restlen + 1);
+	    if (str3len) {
+		memcpy(sptr, str3, str3len);
+		sptr += str3len;
+	    }
+	    if (sublen) {
+		memcpy(sptr, subst, sublen);
+		sptr += sublen;
+	    }
+	    if (restlen)
+		memcpy(sptr, rest, restlen);
+	    sptr[restlen] = '\0';
+	    str3 = snew;
+	    str = snew + str3len + sublen;
+	    setdata(node, str3);
+	    continue;
+	} else if ((qt = c == Qstring) || c == String) {
 	    if ((c = str[1]) == Inpar) {
 		if (!qt)
 		    list->list.flags |= LF_ARRAY;
Index: Test/D03procsubst.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D03procsubst.ztst,v
retrieving revision 1.4
diff -u -r1.4 D03procsubst.ztst
--- Test/D03procsubst.ztst	14 Apr 2005 04:41:28 -0000	1.4
+++ Test/D03procsubst.ztst	13 Nov 2008 20:46:45 -0000
@@ -36,3 +36,51 @@
 0:FDs remain open for external commands called from functions
 >First
 >Zweite
+
+  catfield2() {
+    local -a args
+    args=(${(s.,.)1})
+    print $args[1]
+    cat $args[2]
+    print $args[3]
+  }
+  catfield2 up,<(print $'\x64'own),sideways
+0:<(...) when embedded within an argument
+>up
+>down
+>sideways
+
+  outputfield2() {
+    local -a args
+    args=(${(s.,.)1})
+    print $args[1]
+    echo 'How sweet the moonlight sits upon the bank' >$args[2]
+    print $args[3]
+  }
+  outputfield2 muddy,>(sed -e s/s/th/g >outputfield2.txt),vesture
+  # yuk
+  while [[ ! -e outputfield2.txt || ! -s outputfield2.txt ]]; do :; done
+  cat outputfield2.txt
+0:>(...) when embedded within an argument
+>muddy
+>vesture
+>How thweet the moonlight thitth upon the bank
+
+  catfield1() {
+    local -a args
+    args=(${(s.,.)1})
+    cat $args[1]
+    print $args[2]
+  }
+  catfield1 =(echo s$'\x69't),jessica
+0:=(...) followed by something else without a break
+>sit
+>jessica
+
+  (
+  setopt nonomatch
+  # er... why is this treated as a glob?
+  print everything,=(here is left),alone
+  )
+0:=(...) preceded by other stuff has no special effect
+>everything,=(here is left),alone


-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author