Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: empty here-document bug in zsh 4.3.10



On Mon, 13 Sep 2010 18:13:19 +0000 (UTC)
Ralf Wildenhues <Ralf.Wildenhues@xxxxxx> wrote:
> Hello zsh maintainers,
> 
> cat >file <<EOF
> EOF
> 
> produces an empty file with all Bourne-descending shells I know of,
> except for zsh (even in 'emulate sh' mode) which puts a single newline
> in the file.  Spotted on FreeBSD.

This is an ancient problem based on the fact the we turn here-documents
internally into here-strings, and here-strings are treated as having an
implicit newline at the end.  here-documents shouldn't be, but we fudge the
issue by stripping any newline and adding it back, so if there really
wasn't one we're in trouble in this one case.

We already have enough information to fix the basic problem fairly easily.
The real nightmare is the knock on effect that if you output a function
definition with a here document it comes back as a here string, and then
you're stuck with the limitations of here strings.

The nice fix is to output real here documents from function defintions etc.
That takes a bit more work but is probably worth doing.

What I haven't done is add back tabs stripped with the <<-HERE syntax.

Index: Src/exec.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/exec.c,v
retrieving revision 1.183
diff -p -u -r1.183 exec.c
--- Src/exec.c	31 Aug 2010 19:32:57 -0000	1.183
+++ Src/exec.c	14 Sep 2010 12:56:30 -0000
@@ -3449,11 +3449,12 @@ closem(int how)
 
 /**/
 char *
-gethere(char *str, int typ)
+gethere(char **strp, int typ)
 {
     char *buf;
     int bsiz, qt = 0, strip = 0;
     char *s, *t, *bptr, c;
+    char *str = *strp;
 
     for (s = str; *s; s++)
 	if (inull(*s)) {
@@ -3467,6 +3468,7 @@ gethere(char *str, int typ)
 	while (*str == '\t')
 	    str++;
     }
+    *strp = str;
     bptr = buf = zalloc(bsiz = 256);
     for (;;) {
 	t = bptr;
@@ -3500,8 +3502,6 @@ gethere(char *str, int typ)
 	}
 	*bptr++ = '\n';
     }
-    if (t > buf && t[-1] == '\n')
-	t--;
     *t = '\0';
     if (!qt) {
 	int ef = errflag;
@@ -3529,7 +3529,15 @@ getherestr(struct redir *fn)
     singsub(&t);
     untokenize(t);
     unmetafy(t, &len);
-    t[len++] = '\n';
+    /*
+     * For real here-strings we append a newline, as if the
+     * string given was a complete command line.
+     *
+     * For here-strings from here documents, we use the original
+     * text exactly.
+     */
+    if (!(fn->flags & REDIRF_FROM_HEREDOC))
+	t[len++] = '\n';
     if ((fd = gettempfile(NULL, 1, &s)) < 0)
 	return -1;
     write_loop(fd, t, len);
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.55
diff -p -u -r1.55 lex.c
--- Src/lex.c	28 Apr 2010 08:55:35 -0000	1.55
+++ Src/lex.c	14 Sep 2010 12:56:30 -0000
@@ -384,16 +384,17 @@ zshlex(void)
     if (tok == NEWLIN || tok == ENDINPUT) {
 	while (hdocs) {
 	    struct heredocs *next = hdocs->next;
-	    char *name;
+	    char *doc, *munged_term;
 
 	    hwbegin(0);
 	    cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
+	    munged_term = dupstring(hdocs->str);
 	    STOPHIST
-	    name = gethere(hdocs->str, hdocs->type);
+	    doc = gethere(&munged_term, hdocs->type);
 	    ALLOWHIST
 	    cmdpop();
 	    hwend();
-	    if (!name) {
+	    if (!doc) {
 		zerr("here document too large");
 		while (hdocs) {
 		    next = hdocs->next;
@@ -403,7 +404,8 @@ zshlex(void)
 		tok = LEXERR;
 		break;
 	    }
-	    setheredoc(hdocs->pc, REDIR_HERESTR, name);
+	    setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
+		       munged_term);
 	    zfree(hdocs, sizeof(struct heredocs));
 	    hdocs = next;
 	}
Index: Src/parse.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/parse.c,v
retrieving revision 1.83
diff -p -u -r1.83 parse.c
--- Src/parse.c	16 Dec 2009 18:39:07 -0000	1.83
+++ Src/parse.c	14 Sep 2010 12:56:30 -0000
@@ -1813,13 +1813,17 @@ par_redir(int *rp, char *idstring)
 	struct heredocs **hd;
 	int htype = type;
 
+	/*
+	 * Add two here for the string to remember the HERE
+	 * terminator in raw and munged form.
+	 */
 	if (idstring)
 	{
 	    type |= REDIR_VARID_MASK;
-	    ncodes = 4;
+	    ncodes = 6;
 	}
 	else
-	    ncodes = 3;
+	    ncodes = 5;
 
 	/* If we ever to change the number of codes, we have to change
 	 * the definition of WC_REDIR_WORDS. */
@@ -1828,10 +1832,16 @@ par_redir(int *rp, char *idstring)
 	ecbuf[r] = WCB_REDIR(type);
 	ecbuf[r + 1] = fd1;
 
+	/*
+	 * r + 2: the HERE string we recover
+	 * r + 3: the HERE document terminator, raw
+	 * r + 4: the HERE document terminator, munged
+	 */
 	if (idstring)
-	    ecbuf[r + 3] = ecstrcode(idstring);
+	    ecbuf[r + 5] = ecstrcode(idstring);
 
-	for (hd = &hdocs; *hd; hd = &(*hd)->next);
+	for (hd = &hdocs; *hd; hd = &(*hd)->next)
+	    ;
 	*hd = zalloc(sizeof(struct heredocs));
 	(*hd)->next = NULL;
 	(*hd)->type = htype;
@@ -1887,10 +1897,12 @@ par_redir(int *rp, char *idstring)
 
 /**/
 void
-setheredoc(int pc, int type, char *str)
+setheredoc(int pc, int type, char *str, char *termstr, char *munged_termstr)
 {
     ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);
     ecbuf[pc + 2] = ecstrcode(str);
+    ecbuf[pc + 3] = ecstrcode(termstr);
+    ecbuf[pc + 4] = ecstrcode(munged_termstr);
 }
 
 /*
@@ -2439,10 +2451,15 @@ ecgetredirs(Estate s)
 	r->type = WC_REDIR_TYPE(code);
 	r->fd1 = *s->pc++;
 	r->name = ecgetstr(s, EC_DUP, NULL);
-	if (WC_REDIR_FROM_HEREDOC(code))
+	if (WC_REDIR_FROM_HEREDOC(code)) {
 	    r->flags = REDIRF_FROM_HEREDOC;
-	else
+	    r->here_terminator = ecgetstr(s, EC_DUP, NULL);
+	    r->munged_here_terminator = ecgetstr(s, EC_DUP, NULL);
+	} else {
 	    r->flags = 0;
+	    r->here_terminator = NULL;
+	    r->munged_here_terminator = NULL;
+	}
 	if (WC_REDIR_VARID(code))
 	    r->varid = ecgetstr(s, EC_DUP, NULL);
 	else
Index: Src/text.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/text.c,v
retrieving revision 1.25
diff -p -u -r1.25 text.c
--- Src/text.c	13 Feb 2010 20:28:36 -0000	1.25
+++ Src/text.c	14 Sep 2010 12:56:30 -0000
@@ -30,7 +30,7 @@
 #include "zsh.mdh"
 #include "text.pro"
 
-static char *tptr, *tbuf, *tlim;
+static char *tptr, *tbuf, *tlim, *tpending;
 static int tsiz, tindent, tnewlins, tjob;
 
 static void
@@ -41,6 +41,53 @@ dec_tindent(void)
 	tindent--;
 }
 
+/*
+ * Add a pair of pending strings and a newline.
+ * This is used for here documents.  It will be output when
+ * we have a lexically significant newline.
+ *
+ * This isn't that common and a multiple use on the same line is *very*
+ * uncommon; we don't try to optimise it.
+ *
+ * This is not used for job text; there we bear the inaccuracy
+ * of turning this into a here-string.
+ */
+static void
+taddpending(char *str1, char *str2)
+{
+    int len = strlen(str1) + strlen(str2) + 1;
+
+    /*
+     * We don't strip newlines from here-documents converted
+     * to here-strings, so no munging is required except to
+     * add a newline after the here-document terminator.
+     * However, because the job text doesn't automatically
+     * have a newline right at the end, we handle that
+     * specially.
+     */
+    if (tpending) {
+	int oldlen = strlen(tpending);
+	tpending = realloc(tpending, len + oldlen);
+	sprintf(tpending + oldlen, "%s%s", str1, str2);
+    } else {
+	tpending = (char *)zalloc(len);
+	sprintf(tpending, "%s%s", str1, str2);
+    }
+}
+
+/* Output the pending string where appropriate */
+
+static void
+tdopending(void)
+{
+    if (tpending) {
+	taddchr('\n');
+	taddstr(tpending);
+	zsfree(tpending);
+	tpending = NULL;
+    }
+}
+
 /* add a character to the text buffer */
 
 /**/
@@ -107,6 +154,7 @@ taddnl(int no_semicolon)
     int t0;
 
     if (tnewlins) {
+	tdopending();
 	taddchr('\n');
 	for (t0 = 0; t0 != tindent; t0++)
 	    taddchr('\t');
@@ -253,7 +301,7 @@ gettext2(Estate state)
     while (1) {
 	if (stack) {
 	    if (!(s = tstack))
-		return;
+		break;
 	    if (s->pop) {
 		tstack = s->prev;
 		s->prev = tfree;
@@ -795,6 +843,7 @@ gettext2(Estate state)
 	    return;
 	}
     }
+    tdopending();
 }
 
 /**/
@@ -833,27 +882,53 @@ getredirs(LinkList redirs)
 		taddchr('}');
 	    } else if (f->fd1 != (IS_READFD(f->type) ? 0 : 1))
 		taddchr('0' + f->fd1);
-	    taddstr(fstr[f->type]);
-	    if (f->type != REDIR_MERGEIN && f->type != REDIR_MERGEOUT)
-		taddchr(' ');
 	    if (f->type == REDIR_HERESTR &&
 		(f->flags & REDIRF_FROM_HEREDOC)) {
-		/*
-		 * Strings that came from here-documents are converted
-		 * to here strings without quotation, so add that
-		 * now.  If tokens are present we need to do double quoting.
-		 */
-		if (!has_token(f->name)) {
-		    taddchr('\'');
-		    taddstr(quotestring(f->name, NULL, QT_SINGLE));
-		    taddchr('\'');
+		if (tnewlins) {
+		    /*
+		     * Strings that came from here-documents are converted
+		     * to here strings without quotation, so convert them
+		     * back.
+		     */
+		    taddstr(fstr[REDIR_HEREDOC]);
+		    taddstr(f->here_terminator);
+		    taddpending(f->name, f->munged_here_terminator);
 		} else {
-		    taddchr('"');
-		    taddstr(quotestring(f->name, NULL, QT_DOUBLE));
-		    taddchr('"');
+		    taddstr(fstr[REDIR_HERESTR]);
+		    /*
+		     * Just a quick and dirty representation.
+		     * Remove a terminating newline, if any.
+		     */
+		    int fnamelen = strlen(f->name);
+		    int sav;
+		    if (fnamelen > 0 && f->name[fnamelen-1] == '\n') {
+			sav = 1;
+			f->name[fnamelen-1] = '\0';
+		    } else
+			sav = 0;
+		    /*
+		     * Strings that came from here-documents are converted
+		     * to here strings without quotation, so add that
+		     * now.  If tokens are present we need to do double quoting.
+		     */
+		    if (!has_token(f->name)) {
+			taddchr('\'');
+			taddstr(quotestring(f->name, NULL, QT_SINGLE));
+			taddchr('\'');
+		    } else {
+			taddchr('"');
+			taddstr(quotestring(f->name, NULL, QT_DOUBLE));
+			taddchr('"');
+		    }
+		    if (sav)
+			f->name[fnamelen-1] = '\n';
 		}
-	    } else
+	    } else {
+		taddstr(fstr[f->type]);
+		if (f->type != REDIR_MERGEIN && f->type != REDIR_MERGEOUT)
+		    taddchr(' ');
 		taddstr(f->name);
+	    }
 	    taddchr(' ');
 	    break;
 #ifdef DEBUG
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.167
diff -p -u -r1.167 zsh.h
--- Src/zsh.h	12 Sep 2010 18:56:41 -0000	1.167
+++ Src/zsh.h	14 Sep 2010 12:56:30 -0000
@@ -597,6 +597,8 @@ struct redir {
     int fd1, fd2;
     char *name;
     char *varid;
+    char *here_terminator;
+    char *munged_here_terminator;
 };
 
 /* The number of fds space is allocated for  *
@@ -787,7 +789,9 @@ struct eccstr {
 #define WC_REDIR_FROM_HEREDOC(C) ((int)(wc_data(C) & REDIR_FROM_HEREDOC_MASK))
 #define WCB_REDIR(T)        wc_bld(WC_REDIR, (T))
 /* Size of redir is 4 words if REDIR_VARID_MASK is set, else 3 */
-#define WC_REDIR_WORDS(C)   (WC_REDIR_VARID(C) ? 4 : 3)
+#define WC_REDIR_WORDS(C)			\
+    ((WC_REDIR_VARID(C) ? 4 : 3) +		\
+     (WC_REDIR_FROM_HEREDOC(C) ? 2 : 0))
 
 #define WC_ASSIGN_TYPE(C)   (wc_data(C) & ((wordcode) 1))
 #define WC_ASSIGN_TYPE2(C)  ((wc_data(C) & ((wordcode) 2)) >> 1)
Index: Test/A04redirect.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/A04redirect.ztst,v
retrieving revision 1.14
diff -p -u -r1.14 A04redirect.ztst
--- Test/A04redirect.ztst	12 Nov 2008 10:55:18 -0000	1.14
+++ Test/A04redirect.ztst	14 Sep 2010 12:56:30 -0000
@@ -134,6 +134,10 @@
 >    $foo$foo met celeste  'but with extra'  "stuff to test quoting"
 >Last line
 
+  read -r line <<'  HERE'
+  HERE
+1:No input, not even newline, from empty here document.
+
   #
   # exec tests: perform these in subshells so if they fail the
   # shell won't exit.

-- 
Peter Stephenson <pws@xxxxxxx>            Software Engineer
Tel: +44 (0)1223 692070                   Cambridge Silicon Radio Limited
Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom



Messages sorted by: Reverse Date, Date, Thread, Author