Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Multibyte output confuses "print -c" ?



On Sun, 10 Dec 2006 13:34:59 -0800
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> Comparing the output of "print -C 1" and "print -l" on that same expansion,
> there appears first to be a problem with columnation of strings containing
> a nul byte.
> 
> zsh% print -c '<\U0000000T>'
> <
> zsh% print '<\U0000000T>' | cat -v
> <^@T>

I don't think I followed everything here, but output in columns is yet
another of the seemingly infinite variants of print that no one bothered
to fix up when print was altered to use unmetafied bytes with a separate
length.

The same seems to be true of -o and -O; I've made a note but not altered
that.

Index: Src/builtin.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/builtin.c,v
retrieving revision 1.170
diff -u -r1.170 builtin.c
--- Src/builtin.c	11 Nov 2006 13:16:10 -0000	1.170
+++ Src/builtin.c	10 Dec 2006 23:12:52 -0000
@@ -3603,6 +3603,13 @@
     }
 
     /* -o and -O -- sort the arguments */
+    /*
+     * TODO: this appears to be yet another of the endless
+     * chunks of code that didn't get fixed up properly
+     * to reflect the fact that args contains unmetafied
+     * strings that may contain NULs with the lengths in
+     * len.
+     */
     if (OPT_ISSET(ops,'o')) {
 	if (fmt && !*args) return 0;
 	if (OPT_ISSET(ops,'i'))
@@ -3624,7 +3631,6 @@
     /* -c -- output in columns */
     if (!fmt && (OPT_ISSET(ops,'c') || OPT_ISSET(ops,'C'))) {
 	int l, nc, nr, sc, n, t, i;
-	char **ap;
 
 	if (OPT_ISSET(ops,'C')) {
 	    char *eptr, *argptr = OPT_ARG(ops,'C');
@@ -3647,13 +3653,12 @@
 
 	    /*
 	     * i: loop counter
-	     * ap: array iterator
 	     * l: maximum length seen
 	     *
 	     * Ignore lengths in last column since they don't affect
 	     * the separation.
 	     */
-	    for (i = l = 0, ap = args; *ap; ap++, i++) {
+	    for (i = l = 0; i < argc; i++) {
 		if (OPT_ISSET(ops, 'a')) {
 		    if ((i % nc) == nc - 1)
 			continue;
@@ -3661,8 +3666,8 @@
 		    if (i >= nr * (nc - 1))
 			break;
 		}
-		if (l < (t = strlen(*ap)))
-		    l = t;
+		if (l < len[i])
+		    l = len[i];
 	    }
 	    sc = l + 2;
 	}
@@ -3670,12 +3675,11 @@
 	{
 	    /*
 	     * n: loop counter
-	     * ap: array iterator
 	     * l: maximum length seen
 	     */
-	    for (n = l = 0, ap = args; *ap; ap++, n++)
-		if (l < (t = strlen(*ap)))
-		    l = t;
+	    for (n = l = 0; n < argc; n++)
+		if (l < len[n])
+		    l = len[n];
 
 	    /*
 	     * sc: column width
@@ -3689,31 +3693,31 @@
 	}
 
 	if (OPT_ISSET(ops,'a'))	/* print across, i.e. columns first */
-	    ap = args;
+	    n = 0;
 	for (i = 0; i < nr; i++) {
 	    if (OPT_ISSET(ops,'a'))
 	    {
 		int ic;
-		for (ic = 0; ic < nc && *ap; ic++, ap++)
+		for (ic = 0; ic < nc && n < argc; ic++, n++)
 		{
-		    l = strlen(*ap);
-		    fprintf(fout, "%s", *ap);
-		    if (*ap)
+		    l = len[n];
+		    fwrite(args[n], l, 1, fout);
+		    if (n < argc)
 			for (; l < sc; l++)
 			    fputc(' ', fout);
 		}
 	    }
 	    else
 	    {
-		ap = args + i;
+		n = i;
 		do {
-		    l = strlen(*ap);
-		    fprintf(fout, "%s", *ap);
-		    for (t = nr; t && *ap; t--, ap++);
-		    if (*ap)
+		    l = len[n];
+		    fwrite(args[n], l, 1, fout);
+		    for (t = nr; t && n < argc; t--, n++);
+		    if (n < argc)
 			for (; l < sc; l++)
 			    fputc(' ', fout);
-		} while (*ap);
+		} while (n < argc);
 	    }
 	    fputc(OPT_ISSET(ops,'N') ? '\0' : '\n', fout);
 	}

-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author