Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: problem in prompt in utf-8



Peter Stephenson wrote:
> This should fix the immediate problem, but note that the width of the
> prompt isn't calculated correctly yet: we don't scan prompts for
> multibyte characters.  Hence you might see oddities with the display
> since the shell doesn't know the position of the cursor after the
> prompt.  This is another thing on the list of fixes needed in zle.  (It
> should come under the "not rocket science" heading, unlike the
> completion code, so I hope it will be fixed relatively soon.)

Yeah.

I think this does the trick.  It relies on the fact that we usually print
out the prompt completely, so we don't need to convert it to a wide
character array, just count the characters in it.  We do this because
prompts can have zero-width characters such as terminal escapes.  There
was an optimisation that we could assume everything was hunky dory if
the width was the same as the length of the prompt, but I don't think
that works any more now I'm using wcwidth() for characters in the
prompt.

This may not be rocket science, but it's not trivial either, so there
could well be glitches.

The truncation code (stuff like "%12<...<") doesn't handle multibyte
characters properly yet.  Also, I didn't put wcwidth() anywhere other
than in the prompt width calculation, so characters in the editor
buffers are still assumed to have screen width 1.

Index: Src/prompt.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/prompt.c,v
retrieving revision 1.23
diff -u -r1.23 prompt.c
--- Src/prompt.c	9 Sep 2004 10:12:47 -0000	1.23
+++ Src/prompt.c	17 Sep 2005 21:25:28 -0000
@@ -804,10 +804,15 @@
     return 0;
 }
 
-/* Count height etc. of a prompt string returned by promptexpand(). *
- * This depends on the current terminal width, and tabs and         *
- * newlines require nontrivial processing.                          *
- * Passing `overf' as -1 means to ignore columns (absolute width).  */
+/*
+ * Count height etc. of a prompt string returned by promptexpand().
+ * This depends on the current terminal width, and tabs and
+ * newlines require nontrivial processing.
+ * Passing `overf' as -1 means to ignore columns (absolute width).
+ *
+ * If multibyte is enabled, take account of multibyte characters
+ * by counting 1 for each.
+ */
 
 /**/
 mod_export void
@@ -815,29 +820,96 @@
 {
     int w = 0, h = 1;
     int s = 1;
-    for(; *str; str++) {
-	if(w >= columns && overf >= 0) {
+#ifdef ZLE_UNICODE_SUPPORT
+    int mbret, wcw, multi = 0;
+    char inchar;
+    mbstate_t mbs;
+    wchar_t wc;
+
+    memset(&mbs, 0, sizeof(mbs));
+#endif
+
+    for (; *str; str++) {
+	if (w >= columns && overf >= 0) {
 	    w = 0;
 	    h++;
 	}
-	if(*str == Meta)
-	    str++;
-	if(*str == Inpar)
+	/*
+	 * Input string should be metafied, so tokens in it should
+	 * be real tokens, even if there are multibyte characters.
+	 */
+	if (*str == Inpar)
 	    s = 0;
-	else if(*str == Outpar)
+	else if (*str == Outpar)
 	    s = 1;
-	else if(*str == Nularg)
+	else if (*str == Nularg)
 	    w++;
-	else if(s) {
-	    if(*str == '\t')
-		w = (w | 7) + 1;
-	    else if(*str == '\n') {
-		w = 0;
-		h++;
-	    } else
-		w++;
+	else if (s) {
+	    if (*str == Meta) {
+#ifdef ZLE_UNICODE_SUPPORT
+		inchar = *++str ^ 32;
+#else
+		str++;
+#endif
+	    } else {
+#ifdef ZLE_UNICODE_SUPPORT
+		/*
+		 * Don't look for tab or newline in the middle
+		 * of a multibyte character.  Otherwise, we are
+		 * relying on the character set being an extension
+		 * of ASCII so it's safe to test a single byte.
+		 */
+		if (multi) {
+#endif
+		    if (*str == '\t') {
+			w = (w | 7) + 1;
+			continue;
+		    } else if (*str == '\n') {
+			w = 0;
+			h++;
+			continue;
+		    }
+#ifdef ZLE_UNICODE_SUPPORT
+		}
+
+		inchar = *str;
+#endif
+	    }
+
+#ifdef ZLE_UNICODE_SUPPORT
+	    mbret = mbrtowc(&wc, &inchar, 1, &mbs);
+	    if (mbret >= -1) {
+		if (mbret > 0) {
+		    /*
+		     * If the character isn't printable, this returns -1.
+		     */
+		    wcw = wcwidth(wc);
+		    if (wcw > 0)
+			w += wcw;
+		}
+		/*
+		 * else invalid character or possibly null: assume no
+		 * output
+		 */
+		multi = 0;
+	    } else {
+		/* else character is incomplete, keep looking. */
+		multi = 1;
+	    }
+#else
+	    w++;
+#endif
 	}
     }
+#ifdef ZLE_UNICODE_SUPPORT
+    if (multi) {
+	/*
+	 * oops: incomplete multibyte character.  assume we get a funny
+	 * glyph for single screen column.
+	 */
+	w++;
+    }
+#endif
     if(w >= columns && overf >= 0) {
 	if (!overf || w > columns) {
 	    w = 0;
@@ -901,12 +973,15 @@
 	countprompt(ptr, &w, 0, -1);
 	if (w > trunclen) {
 	    /*
-	     * We need to truncate.  t points to the truncation string -- *
-	     * which is inserted literally, without nice representation.  *
-	     * tlen is its length, and maxlen is the amount of the main	  *
-	     * string that we want to keep.  Note that if the truncation  *
-	     * string is longer than the truncation length (tlen >	  *
-	     * trunclen), the truncation string is used in full.	  *
+	     * We need to truncate.  t points to the truncation string --
+	     * which is inserted literally, without nice representation.
+	     * tlen is its length, and maxlen is the amount of the main
+	     * string that we want to keep.  Note that if the truncation
+	     * string is longer than the truncation length (tlen >
+	     * trunclen), the truncation string is used in full.
+	     *
+	     * TODO: we don't take account of multibyte characters
+	     * in the string we're truncating.
 	     */
 	    char *t = truncstr;
 	    int fullen = bp - ptr;
Index: Src/Zle/zle_refresh.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_refresh.c,v
retrieving revision 1.27
diff -u -r1.27 zle_refresh.c
--- Src/Zle/zle_refresh.c	15 Aug 2005 17:30:58 -0000	1.27
+++ Src/Zle/zle_refresh.c	17 Sep 2005 21:25:57 -0000
@@ -30,7 +30,13 @@
 #include "zle.mdh"
 #include "zle_refresh.pro"
 
-/* Expanded prompts */
+/*
+ * Expanded prompts.
+ *
+ * These are always output from the start, except in the special
+ * case where we are sure each character in the prompt corresponds
+ * to a character on screen.
+ */
 
 /**/
 char *lpromptbuf, *rpromptbuf;
@@ -202,7 +208,9 @@
 	}
     }
 
-    /* TODO currently zsh core is not using widechars */
+    /*
+     * countprompt() now correctly handles multibyte input.
+     */
     countprompt(lpromptbuf, &lpromptwof, &lprompth, 1);
     countprompt(rpromptbuf, &rpromptw, &rprompth, 0);
     if (lpromptwof != winw)
@@ -312,7 +320,11 @@
     oxtabs,			/* oxtabs - tabs expand to spaces if set    */
     numscrolls, onumscrolls;
 
-/* TODO currently it assumes sceenwidth 1 for every character */
+/*
+ * TODO currently it assumes sceenwidth 1 for every character
+ * (except for characters in the prompt which are correctly handled
+ * by wcwidth()).
+ */
 /**/
 mod_export void
 zrefresh(void)
@@ -449,7 +461,7 @@
         if (termflags & TERM_SHORT)
             vcs = 0;
         else if (!clearflag && lpromptbuf[0]) {
-            zputs(lpromptbuf, shout);	/* TODO convert to wide characters */
+            zputs(lpromptbuf, shout);
 	    if (lpromptwof == winw)
 		zputs("\n", shout);	/* works with both hasam and !hasam */
 	} else {
@@ -622,7 +634,6 @@
 	if (trashedzle && opts[TRANSIENTRPROMPT])
 	    put_rpmpt = 0;
 	else
-	    /* TODO (r)promptbuf will be widechar */
 	    put_rpmpt = rprompth == 1 && rpromptbuf[0] &&
 		!strchr(rpromptbuf, '\t') &&
 		(int)ZS_strlen(nbuf[0]) + rpromptw < winw - 1;
@@ -677,7 +688,6 @@
     /* output the right-prompt if appropriate */
 	if (put_rpmpt && !ln && !oput_rpmpt) {
 	    moveto(0, winw - 1 - rpromptw);
-	    /* TODO it will be wide char at some point */
 	    zputs(rpromptbuf, shout);
 	    vcs = winw - 1;
 	/* reset character attributes to that set by the main prompt */
@@ -1114,11 +1124,28 @@
 
 /* otherwise _carefully_ write the contents of the video buffer.
    if we're anywhere in the prompt, goto the left column and write the whole
-   prompt out unless ztrlen(lpromptbuf) == lpromptw : we can cheat then */
+   prompt out.
+
+   If strlen(lpromptbuf) == lpromptw, we can cheat and output
+   the appropriate chunk of the string.  This test relies on the
+   fact that any funny business will always make the length of
+   the string larger than the printing width, so if they're the same
+   we have only ASCII characters or a single-byte extension of ASCII.
+   Unfortunately this trick won't work if there are potentially
+   characters occupying more than one column.  We could flag that
+   this has happened (since it's not that common to have characters
+   wider than one column), but for now it's easier not to use the
+   trick if we are using wcwidth() on the prompt.  It's not that
+   common to be editing in the middle of the prompt anyway, I would
+   think.
+   */
     if (vln == 0 && i < lpromptw && !(termflags & TERM_SHORT)) {
+#ifndef ZLE_UNICODE_SUPPORT
 	if ((int)strlen(lpromptbuf) == lpromptw)
 	    fputs(lpromptbuf + i, shout);
-	else if (tccan(TCRIGHT) && (tclen[TCRIGHT] * ct <= ztrlen(lpromptbuf)))
+	else 
+#endif
+	if (tccan(TCRIGHT) && (tclen[TCRIGHT] * ct <= ztrlen(lpromptbuf)))
 	    /* it is cheaper to send TCRIGHT than reprint the whole prompt */
 	    for (ct = lpromptw - i; ct--; )
 		tcout(TCRIGHT);
@@ -1126,7 +1153,7 @@
 	    if (i != 0)
 		zputc('\r');
 	    tc_upcurs(lprompth - 1);
-	    zputs(lpromptbuf, shout); /* TODO wide character */
+	    zputs(lpromptbuf, shout);
 	    if (lpromptwof == winw)
 		zputs("\n", shout);	/* works with both hasam and !hasam */
 	}
@@ -1238,9 +1265,6 @@
     /*
      * Convert the entire lprompt so that we know how to count
      * characters.
-     *
-     * TODO screen widths are still not correct, indeed lpromptw knows
-     * nothing about multibyte characters so may be too long.
      */
     lpend = strchr(lpromptbuf, 0);
     /* Worst case number of characters, not null-terminated */
@@ -1258,6 +1282,7 @@
 	    /* dunno, try to recover */
 	    lpptr++;
 	    *lpwp++ = ZWC('?');
+	    memset(&ps, '\0', sizeof(ps));
 	}
     }
     if (lpwp - lpwbuf < lpromptw) {

-- 
Peter Stephenson <pws@xxxxxxxxxxxxxxxxxxxxxxxx>
Work: pws@xxxxxxx
Web: http://www.pwstephenson.fsnet.co.uk



Messages sorted by: Reverse Date, Date, Thread, Author