Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

[PATCH] fix mulibyte input/mbstate_t problem



Attached patch fixes multibyte input (verified with UTF-8). As it turns out, 
mbstate_t works quite differently from expectation :)

The patch makes it static (with implicit initialization). It is fundamentally 
wrong to reinitialize it every time. mbstate_t is a function of all preceding 
input; for shift state encoding it will also keep current shift state among 
other things. It also means that in the long run every input must have own 
mbstate_t which is initialized when stream is first opened. We need one 
mbstate_t for zle.

It also has small fixes in zsh_utils.

Editing Russian is funny; "echo xxxx" outputs correct text but during line 
editing display is wrong (it counts every UTF-8 as 2 screen characters).

BTW calling getbyte from getsrestchar resets lastchar_wide_valid.

-andrey

PS am I the only one to have problems with SourceForge ssh CVS? It does not 
hang completely but it is painfully slow.
Index: Src/Zle/zle_main.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_main.c,v
retrieving revision 1.60
diff -u -p -r1.60 zle_main.c
--- Src/Zle/zle_main.c	22 Feb 2005 13:13:05 -0000	1.60
+++ Src/Zle/zle_main.c	22 Feb 2005 21:01:07 -0000
@@ -749,10 +749,10 @@ mod_export ZLE_INT_T
 getrestchar(int inchar)
 {
     /* char cnull = '\0'; */
-    char buf[MB_CUR_MAX], *ptr;
+    char c = inchar;
     wchar_t outchar;
     int ret;
-    mbstate_t ps;
+    static mbstate_t ps;
 
     /*
      * We are guaranteed to set a valid wide last character,
@@ -764,28 +764,23 @@ getrestchar(int inchar)
     if (inchar == EOF)
 	return lastchar_wide = WEOF;
 
-    /* reset shift state by converting null */
-    /* mbrtowc(&outchar, &cnull, 1, &ps); */
-    memset (&ps, '\0', sizeof (ps));
-
-    ptr = buf;
-    *ptr++ = inchar;
     /*
      * Return may be zero if we have a NULL; handle this like
      * any other character.
      */
-    while ((ret = mbrtowc(&outchar, buf, ptr - buf, &ps)) < 0) {
+    while ((ret = mbrtowc(&outchar, &c, 1, &ps)) < 0) {
 	if (ret == -1) {
 	    /*
 	     * Invalid input.  Hmm, what's the right thing to do here?
 	     */
 	    return lastchar_wide = WEOF;
 	}
+
 	/* No timeout here as we really need the character. */
 	inchar = getbyte(0);
 	if (inchar == EOF)
 	    return lastchar_wide = WEOF;
-	*ptr++ = inchar;
+	c = inchar;
     }
     return lastchar_wide = (ZLE_INT_T)outchar;
 }
Index: Src/Zle/zle_utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_utils.c,v
retrieving revision 1.19
diff -u -p -r1.19 zle_utils.c
--- Src/Zle/zle_utils.c	22 Feb 2005 13:13:08 -0000	1.19
+++ Src/Zle/zle_utils.c	22 Feb 2005 21:01:07 -0000
@@ -116,8 +116,8 @@ zlelineasstring(ZLE_STRING_T instr, int 
 
     s = zalloc(inll * MB_CUR_MAX + 1);
 
-    for(i=0; i < inll; i++) {
-	if (outcs != NULL && i == incs)
+    for(i=0; i < inll; i++, incs--) {
+	if (outcs != NULL && incs == 0)
 	    *outcs = mb_len;
 	j = wctomb(s + mb_len, instr[i]);
 	if (j == -1) {
@@ -206,7 +206,7 @@ stringaszleline(unsigned char *instr, in
 	wchar_t *outptr = outstr;
 
 	/* mbrtowc(outstr, &cnull, 1, &ps); */
-	memset(&ps, \0, sizeof(ps));
+	memset(&ps, '\0', sizeof(ps));
 
 	while (ll) {
 	    size_t ret = mbrtowc(outptr, inptr, ll, &ps);


Messages sorted by: Reverse Date, Date, Thread, Author