Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Another ${(z)param} buglet



On Mon, 13 Dec 2010 09:35:12 -0800
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> Let's go ahead and use up Z for this so as not to tangle up z with a
> new more restricted delimiter syntax.  Further let's immediately
> reserve a character (maybe "+") to have no meaning of its own, but
> instead to always introduce a delimited string into which we can put 
> new options.  E.g. (C) and (+(C)) could have distinct meanings.
> That gets us back the entire alphabet for new things we want to
> throw into (+:...:), and permits the possibility of nesting (which
> I hope we'd never need) such as (+{+{...}}).

Can't see why that shouldn't work.

Here's a patch still using the previous delimiters, though it shouldn't
be any harder to fix up than before since it's parsed the same way even
though it's using the result more cleanly.

It tidies up the zleparse nonsense into a variable lexflags that
contains bit flags for this stuff.  I discovered one place in
zle_tricky.c where zleparse was being set to 2, predating this set of
patches; however zleparse was only ever tested and set as a boolean
elsewhere (as long ago as 4.2), so that must be cruft now.

This allows me to introduce a new flag, n, so that I can finally get my
lines split on all whitespace including newlines without ';' characters
appearing.

lexsave/lexrestore should probably save and restore lexflags: not done
that yet. Then we don't need to do it in bufferwords() (which does
it cleanly already, but goodness knows what ZLE does).

Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.127
diff -p -u -r1.127 expn.yo
--- Doc/Zsh/expn.yo	12 Dec 2010 22:44:51 -0000	1.127
+++ Doc/Zsh/expn.yo	13 Dec 2010 18:00:21 -0000
@@ -1009,14 +1009,17 @@ find the words, i.e. taking into account
 Comments are not treated specially but as ordinary strings, similar
 to interactive shells with the tt(INTERACTIVE_COMMENTS) option unset.
 
-The flag can take option letters between a following pair of
-`tt(PLUS())' characters.  tt(LPAR()z+PLUS()c+PLUS()RPAR()) causes
-comments to be parsed as a string and retained; any field in the
+The flag can take a combination of option letters between a following
+pair of `tt(PLUS())' characters.  tt(LPAR()z+PLUS()c+PLUS()RPAR())
+causes comments to be parsed as a string and retained; any field in the
 resulting array beginning with an unquoted comment character is a
 comment.  tt(LPAR()z+PLUS()C+PLUS()RPAR()) causes comments to be parsed
 and removed.  The rule for comments is standard: anything between a word
 starting with the third charcter of tt($HISTCHARS), default tt(#), up to
-the next newline is a comment.
+the next newline is a comment.  tt(LPAR()z+PLUS()n+PLUS()RPAR()) causes
+unquoted newlines to be treated as ordinary whitespace, else they are
+treated as if they are shell code delimiters and converted to
+semicolons.
 
 Note that this is done very late, as for the `tt((s))' flag. So to
 access single words in the result, one has to use nested expansions as 
Index: Src/hist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/hist.c,v
retrieving revision 1.107
diff -p -u -r1.107 hist.c
--- Src/hist.c	12 Dec 2010 22:44:51 -0000	1.107
+++ Src/hist.c	13 Dec 2010 18:00:21 -0000
@@ -2345,7 +2345,8 @@ readhistfile(char *fn, int err, int read
 		/*
 		 * Attempt to do this using the lexer.
 		 */
-		LinkList wordlist = bufferwords(NULL, pt, NULL, 1);
+		LinkList wordlist = bufferwords(NULL, pt, NULL,
+						LEXFLAGS_COMMENTS_KEEP);
 		LinkNode wordnode;
 		int nwords_max;
 		nwords_max = 2 * countlinknodes(wordlist);
@@ -2905,10 +2906,10 @@ histfileIsLocked(void)
 
 /**/
 mod_export LinkList
-bufferwords(LinkList list, char *buf, int *index, int comments)
+bufferwords(LinkList list, char *buf, int *index, int flags)
 {
     int num = 0, cur = -1, got = 0, ne = noerrs;
-    int owb = wb, owe = we, oadx = addedx, ozp = zleparse, onc = nocomments;
+    int owb = wb, owe = we, oadx = addedx, ozp = lexflags, onc = nocomments;
     int ona = noaliases, ocs = zlemetacs, oll = zlemetall;
     int forloop = 0, rcquotes = opts[RCQUOTES];
     char *p, *addedspaceptr;
@@ -2925,6 +2926,12 @@ bufferwords(LinkList list, char *buf, in
     addedx = 0;
     noerrs = 1;
     lexsave();
+    lexflags = flags | LEXFLAGS_ACTIVE;
+    /*
+     * Are we handling comments?
+     */
+    nocomments = !(flags & (LEXFLAGS_COMMENTS_KEEP|
+			    LEXFLAGS_COMMENTS_STRIP));
     if (buf) {
 	int l = strlen(buf);
 
@@ -2943,18 +2950,10 @@ bufferwords(LinkList list, char *buf, in
 	inpush(p, 0, NULL);
 	zlemetall = strlen(p) ;
 	zlemetacs = zlemetall + 1;
-
-	/*
-	 * If comments is non-zero we are handling comments.
-	 * zleparse indicates the mode to the lexer.
-	 */
-	zleparse = 1 + comments;
-	nocomments = !comments;
     } else {
 	int ll, cs;
 	char *linein;
 
-	zleparse = 1;
 	linein = zleentry(ZLE_CMD_GET_LINE, &ll, &cs);
 	zlemetall = ll + 1; /* length of line plus space added below */
 	zlemetacs = cs;
@@ -3096,7 +3095,7 @@ bufferwords(LinkList list, char *buf, in
 	    }
 	    forloop--;
 	}
-	if (!got && !zleparse) {
+	if (!got && !lexflags) {
 	    got = 1;
 	    cur = num - 1;
 	}
@@ -3121,7 +3120,7 @@ bufferwords(LinkList list, char *buf, in
     strinend();
     inpop();
     errflag = 0;
-    zleparse = ozp;
+    lexflags = ozp;
     nocomments = onc;
     noerrs = ne;
     lexrestore();
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.59
diff -p -u -r1.59 lex.c
--- Src/lex.c	13 Dec 2010 11:20:47 -0000	1.59
+++ Src/lex.c	13 Dec 2010 18:00:21 -0000
@@ -117,15 +117,11 @@ mod_export int wb, we;
 mod_export int noaliases;
 
 /*
- * we are parsing a line sent to use by the editor, or some other string
- * that's not part of standard command input (e.g. eval is part of
- * normal command input).
+ * If non-zero, we are parsing a line sent to use by the editor, or some
+ * other string that's not part of standard command input (e.g. eval is
+ * part of normal command input).
  *
- * zleparse = 1 is the normal case.
- * zleparse = 2 is used for word splitting; the difference is we
- *              preserve comments.
- * zleparse = 3 is also for word splitting, here handling comments
- *              but stripping them.
+ * Set of bits from LEXFLAGS_*.
  *
  * Note that although it is passed into the lexer as an input, the
  * lexer can set it to zero after finding the word it's searching for.
@@ -134,7 +130,7 @@ mod_export int noaliases;
  */
 
 /**/
-mod_export int zleparse;
+mod_export int lexflags;
 
 /**/
 mod_export int wordbeg;
@@ -429,7 +425,7 @@ zshlex(void)
 	isnewlin = 0;
     else
 	isnewlin = (inbufct) ? -1 : 1;
-    if (tok == SEMI || tok == NEWLIN)
+    if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
 	tok = SEPER;
 }
 
@@ -588,9 +584,9 @@ add(int c)
     }
 }
 
-#define SETPARBEGIN {if (zleparse && !(inbufflags & INP_ALIAS) && zlemetacs >= zlemetall+1-inbufct) parbegin = inbufct;}
+#define SETPARBEGIN {if (lexflags && !(inbufflags & INP_ALIAS) && zlemetacs >= zlemetall+1-inbufct) parbegin = inbufct;}
 #define SETPAREND {\
-	    if (zleparse && !(inbufflags & INP_ALIAS) && parbegin != -1 && parend == -1) {\
+	    if (lexflags && !(inbufflags & INP_ALIAS) && parbegin != -1 && parend == -1) {\
 		if (zlemetacs >= zlemetall + 1 - inbufct)\
 		    parbegin = -1;\
 		else\
@@ -760,22 +756,17 @@ gettok(void)
 
     /*
      * Handle comments.  There are some special cases when this
-     * is not normal command input: zleparse implies we are examining
+     * is not normal command input: lexflags implies we are examining
      * a line lexically without it being used for normal command input.
-     * If zleparse is 1 we treat comments as normal for interactive
-     * mode.
-     * If zleparse is 2 (which has actually got nothing to do with zle)
-     * we always handle comments and retain them.
-     * If zleparse is 3 we always handle comments and discard them.
      */
     if (c == hashchar && !nocomments &&
 	(isset(INTERACTIVECOMMENTS) ||
-	 ((zleparse != 1) && !expanding &&
+	 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
 	  (!interact || unset(SHINSTDIN) || strin)))) {
 	/* History is handled here to prevent extra  *
 	 * newlines being inserted into the history. */
 
-	if (zleparse == 2) {
+	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
 	    len = 0;
 	    bptr = tokstr = (char *)hcalloc(bsiz = 32);
 	    add(c);
@@ -783,14 +774,14 @@ gettok(void)
 	while ((c = ingetc()) != '\n' && !lexstop) {
 	    hwaddc(c);
 	    addtoline(c);
-	    if (zleparse == 2)
+	    if (lexflags & LEXFLAGS_COMMENTS_KEEP)
 		add(c);
 	}
 
 	if (errflag)
 	    peek = LEXERR;
 	else {
-	    if (zleparse == 2) {
+	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
 		*bptr = '\0';
 		if (!lexstop)
 		    hungetc(c);
@@ -805,7 +796,7 @@ gettok(void)
 		 * we don't want a newline token since it's
 		 * treated specially.
 		 */
-		if (zleparse == 3 && lexstop)
+		if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
 		    peek = ENDINPUT;
 		else
 		    peek = NEWLIN;
@@ -1778,7 +1769,7 @@ gotword(void)
     we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
     if (zlemetacs <= we) {
 	wb = zlemetall - wordbeg + addedx;
-	zleparse = 0;
+	lexflags = 0;
     }
 }
 
@@ -1813,11 +1804,11 @@ exalias(void)
 	} else
 	    zshlextext = tokstr;
 
-	if (zleparse && !(inbufflags & INP_ALIAS)) {
-	    int zp = zleparse;
+	if (lexflags && !(inbufflags & INP_ALIAS)) {
+	    int zp = lexflags;
 
 	    gotword();
-	    if (zp == 1 && !zleparse) {
+	    if (zp == 1 && !lexflags) {
 		if (zshlextext == copy)
 		    zshlextext = tokstr;
 		return 0;
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.113
diff -p -u -r1.113 subst.c
--- Src/subst.c	12 Dec 2010 22:44:51 -0000	1.113
+++ Src/subst.c	13 Dec 2010 18:00:21 -0000
@@ -1557,9 +1557,7 @@ paramsubst(LinkList l, LinkNode n, char 
      * spbreak, see above; fairly straighforward in use but c.f.
      * the comment for mods.
      *
-     * This ultimately becomes zleparse during lexical analysis, via
-     * the comments argument to bufferwords(). It's got nothing
-     * to do with zle.
+     * This gets set to one of the LEXFLAGS_* values.
      */
     int shsplit = 0;
     /*
@@ -1937,19 +1935,24 @@ paramsubst(LinkList l, LinkNode n, char 
 		    break;
 
 		case 'z':
-		    shsplit = 1;
+		    shsplit = LEXFLAGS_ACTIVE;
 		    if (s[1] == '+') {
 			s += 2;
 			while (*s && *s != '+' && *s != ')' && *s != Outpar) {
 			    switch (*s++) {
 			    case 'c':
 				/* Parse and keep comments */
-				shsplit = 2;
+				shsplit |= LEXFLAGS_COMMENTS_KEEP;
 				break;
 
 			    case 'C':
 				/* Parse and remove comments */
-				shsplit = 3;
+				shsplit |= LEXFLAGS_COMMENTS_STRIP;
+				break;
+
+			    case 'n':
+				/* Treat newlines as whitespace */
+				shsplit |= LEXFLAGS_NEWLINE;
 				break;
 
 			    default:
@@ -3232,10 +3235,10 @@ paramsubst(LinkList l, LinkNode n, char 
 	if (isarr) {
 	    char **ap;
 	    for (ap = aval; *ap; ap++)
-		list = bufferwords(list, *ap, NULL, shsplit-1);
+		list = bufferwords(list, *ap, NULL, shsplit);
 	    isarr = 0;
 	} else
-	    list = bufferwords(NULL, val, NULL, shsplit-1);
+	    list = bufferwords(NULL, val, NULL, shsplit);
 
 	if (!list || !firstnode(list))
 	    val = dupstring("");
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.170
diff -p -u -r1.170 zsh.h
--- Src/zsh.h	22 Nov 2010 11:42:47 -0000	1.170
+++ Src/zsh.h	13 Dec 2010 18:00:21 -0000
@@ -1823,6 +1823,34 @@ struct histent {
 #define HFILE_NO_REWRITE	0x0020
 #define HFILE_USE_OPTIONS	0x8000
 
+/*
+ * Flags argument to bufferwords() used
+ * also by lexflags variable.
+ */
+/*
+ * Kick the lexer into special string-analysis
+ * mode without parsing.  Any bit set in
+ * the flags has this effect, but this
+ * has otherwise all the default effects.
+ */
+#define LEXFLAGS_ACTIVE		0x0001
+/*
+ * Parse comments and treat each comment as a single string
+ */
+#define LEXFLAGS_COMMENTS_KEEP	0x0002
+/*
+ * Parse comments and strip them.
+ */
+#define LEXFLAGS_COMMENTS_STRIP	0x0004
+/*
+ * Either of the above
+ */
+#define LEXFLAGS_COMMENTS (LEXFLAGS_COMMENTS_KEEP|LEXFLAGS_COMMENTS_STRIP)
+/*
+ * Treat newlines as whitespace
+ */
+#define LEXFLAGS_NEWLINE	0x0008
+
 /******************************************/
 /* Definitions for programable completion */
 /******************************************/
Index: Src/Zle/compcore.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/compcore.c,v
retrieving revision 1.102
diff -p -u -r1.102 compcore.c
--- Src/Zle/compcore.c	6 Dec 2010 10:49:51 -0000	1.102
+++ Src/Zle/compcore.c	13 Dec 2010 18:00:21 -0000
@@ -1481,7 +1481,7 @@ set_comp_sep(void)
 
     /* Put the string in the lexer buffer and call the lexer to *
      * get the words we have to expand.                        */
-    zleparse = 1;
+    lexflags = LEXFLAGS_ACTIVE;
     ocs = zlemetacs;
     oll = zlemetall;
     ol = zlemetaline;
@@ -1616,7 +1616,7 @@ set_comp_sep(void)
         }
 	else
 	    p = NULL;
-	if (!got && !zleparse) {
+	if (!got && !lexflags) {
 	    DPUTS(!p, "no current word in substr");
 	    got = 1;
 	    cur = i;
@@ -1634,7 +1634,7 @@ set_comp_sep(void)
     noaliases = ona;
     strinend();
     inpop();
-    errflag = zleparse = 0;
+    errflag = lexflags = 0;
     noerrs = ne;
     lexrestore();
     wb = owb;
Index: Src/Zle/compctl.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/compctl.c,v
retrieving revision 1.38
diff -p -u -r1.38 compctl.c
--- Src/Zle/compctl.c	11 Nov 2008 19:36:34 -0000	1.38
+++ Src/Zle/compctl.c	13 Dec 2010 18:00:21 -0000
@@ -2789,7 +2789,7 @@ sep_comp_string(char *ss, char *s, int n
 
     /* Put the string in the lexer buffer and call the lexer to *
      * get the words we have to expand.                        */
-    zleparse = 1;
+    lexflags = LEXFLAGS_ACTIVE;
     addedx = 1;
     noerrs = 1;
     lexsave();
@@ -2828,7 +2828,7 @@ sep_comp_string(char *ss, char *s, int n
 	    addlinknode(foo, (p = ztrdup(tokstr)));
 	else
 	    p = NULL;
-	if (!got && !zleparse) {
+	if (!got && !lexflags) {
 	    DPUTS(!p, "no current word in substr");
 	    got = 1;
 	    cur = i;
@@ -2843,7 +2843,7 @@ sep_comp_string(char *ss, char *s, int n
     noaliases = ona;
     strinend();
     inpop();
-    errflag = zleparse = 0;
+    errflag = lexflags = 0;
     noerrs = ne;
     lexrestore();
     wb = owb;
@@ -3703,7 +3703,7 @@ makecomplistflags(Compctl cc, char *s, i
 
 	/* Put the string in the lexer buffer and call the lexer to *
 	 * get the words we have to expand.                        */
-	zleparse = 1;
+	lexflags = LEXFLAGS_ACTIVE;
 	lexsave();
 	tmpbuf = (char *)zhalloc(strlen(cc->str) + 5);
 	sprintf(tmpbuf, "foo %s", cc->str); /* KLUDGE! */
@@ -3721,7 +3721,7 @@ makecomplistflags(Compctl cc, char *s, i
 	noaliases = ona;
 	strinend();
 	inpop();
-	errflag = zleparse = 0;
+	errflag = lexflags = 0;
 	lexrestore();
 	/* Fine, now do full expansion. */
 	prefork(foo, 0);
Index: Src/Zle/zle_tricky.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_tricky.c,v
retrieving revision 1.102
diff -p -u -r1.102 zle_tricky.c
--- Src/Zle/zle_tricky.c	22 Mar 2010 16:26:00 -0000	1.102
+++ Src/Zle/zle_tricky.c	13 Dec 2010 18:00:21 -0000
@@ -1140,7 +1140,7 @@ get_comp_string(void)
     zsfree(varname);
     varname = NULL;
     insubscr = 0;
-    zleparse = 1;
+    lexflags = LEXFLAGS_ACTIVE;
     clwpos = -1;
     lexsave();
     inpush(dupstrspace(linptr), 0, NULL);
@@ -1244,7 +1244,7 @@ get_comp_string(void)
 	    if (wordpos != redirpos)
 		wordpos = redirpos = 0;
 	}
-	if (!zleparse && !tt0) {
+	if (!lexflags && !tt0) {
 	    /* This is done when the lexer reached the word the cursor is on. */
 	    tt = tokstr ? dupstring(tokstr) : NULL;
 
@@ -1345,7 +1345,7 @@ get_comp_string(void)
 				 (sl - 1) : (zlemetacs_qsub - wb)]);
 	}
     } while (tok != LEXERR && tok != ENDINPUT &&
-	     (tok != SEPER || (zleparse && !tt0)));
+	     (tok != SEPER || (lexflags && !tt0)));
     /* Calculate the number of words stored in the clwords array. */
     clwnum = (tt || !wordpos) ? wordpos : wordpos - 1;
     zsfree(clwords[clwnum]);
@@ -1360,7 +1360,7 @@ get_comp_string(void)
     }
     strinend();
     inpop();
-    errflag = zleparse = 0;
+    errflag = lexflags = 0;
     if (parbegin != -1) {
 	/* We are in command or process substitution if we are not in
 	 * a $((...)). */
@@ -2707,7 +2707,7 @@ doexpandhist(void)
     noaliases = ona;
     strinend();
     inpop();
-    zleparse = 0;
+    lexflags = 0;
     lexrestore();
     expanding = 0;
 
@@ -2807,7 +2807,7 @@ getcurcmd(void)
     int curlincmd;
     char *s = NULL;
 
-    zleparse = 2;
+    lexflags = LEXFLAGS_ACTIVE;
     lexsave();
     metafy_line();
     inpush(dupstrspace(zlemetaline), 0, NULL);
@@ -2825,11 +2825,11 @@ getcurcmd(void)
 	    cmdwe = zlemetall + 1 - inbufct;
 	}
     }
-    while (tok != ENDINPUT && tok != LEXERR && zleparse);
+    while (tok != ENDINPUT && tok != LEXERR && lexflags);
     popheap();
     strinend();
     inpop();
-    errflag = zleparse = 0;
+    errflag = lexflags = 0;
     unmetafy_line();
     lexrestore();
 
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.49
diff -p -u -r1.49 D04parameter.ztst
--- Test/D04parameter.ztst	13 Dec 2010 11:20:47 -0000	1.49
+++ Test/D04parameter.ztst	13 Dec 2010 18:00:21 -0000
@@ -462,6 +462,17 @@
 >with
 >comment
 
+  line=$'echo one\necho two # with a comment\necho three'
+  print -l ${(z+nc+)line}
+0:Treating zplit newlines as ordinary whitespace
+>echo
+>one
+>echo
+>two
+># with a comment
+>echo
+>three
+
   psvar=(dog)
   setopt promptsubst
   foo='It shouldn'\''t $(happen) to a %1v.'

-- 
Peter Stephenson <pws@xxxxxxx>            Software Engineer
Tel: +44 (0)1223 692070                   Cambridge Silicon Radio Limited
Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom



Messages sorted by: Reverse Date, Date, Thread, Author