Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: various weirdnesses with unicode support



Peter Stephenson <pws@xxxxxxx> wrote:
> David =?utf-8?B?R8OzbWV6?= wrote:
> > Hi Mikael ;),
> > 
> > On Sep 07 at 10:30:24, Mikael Magnusson wrote:
> > > * Having zsh in utf-8 locale but the terminal inputting for example
> > > ISO-8859-1 makes zsh enter some weird state where
> > 
> > Yep, i noticed too this one. It becomes necessary to logout, resetting
> > the terminal doesn't fix it. From your list, i think this is the
> > most serious problem.
> 
> It might be because the multibyte input state never gets reset
> (getrestchar() in zle_main.c).  How does this manifest itself?  Are you
> unable even to generate a new line?  If you can, resetting the state for
> each line would be enough.  Otherwise we could time out multibyte
> characters and return, for example, a '?'.

For example, this patch uses the existing $KEYTIMEOUT variable to time out
the remaining bytes of a multibyte character.  If mbrtowc() reported
there was more, but reading the next byte took more than $KEYTIMEOUT
hundredths of a second, the character is returned as a wide '?' and
the shift state for character input is reset.

Much of the patch is adding the extra argument to getbyte to differentiate
a real EOF from a timeout.

Does this help?  I suspect we probably need something like this anyway.

Index: Src/builtin.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/builtin.c,v
retrieving revision 1.146
diff -u -r1.146 builtin.c
--- Src/builtin.c	10 Aug 2005 07:45:17 -0000	1.146
+++ Src/builtin.c	8 Sep 2005 10:24:14 -0000
@@ -4539,7 +4539,7 @@
 
 	do {
 	    if (izle) {
-		if ((val = getkeyptr(0)) < 0)
+		if ((val = getkeyptr(0, NULL)) < 0)
 		    break;
 		*bptr++ = (char) val;
 		nchars--;
@@ -4595,7 +4595,7 @@
 
 	/* get, and store, reply */
 	if (izle) {
-	    int key = getkeyptr(0);
+	    int key = getkeyptr(0, NULL);
 
 	    readbuf[0] = (key == 'y' ? 'y' : 'n');
 	} else {
@@ -4818,7 +4818,7 @@
     int ret;
 
     if (izle) {
-	int c = getkeyptr(0);
+	int c = getkeyptr(0, NULL);
 
 	return (c < 0 ? EOF : c);
     }
Index: Src/init.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/init.c,v
retrieving revision 1.56
diff -u -r1.56 init.c
--- Src/init.c	9 Aug 2005 09:33:50 -0000	1.56
+++ Src/init.c	8 Sep 2005 10:24:14 -0000
@@ -82,7 +82,7 @@
 /* Pointer to read-key function from zle */
 
 /**/
-mod_export int (*getkeyptr) _((int));
+mod_export int (*getkeyptr) _((int, int *));
 
 /* SIGCHLD mask */
 
Index: Src/Zle/zle_keymap.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_keymap.c,v
retrieving revision 1.17
diff -u -r1.17 zle_keymap.c
--- Src/Zle/zle_keymap.c	15 Aug 2005 10:01:50 -0000	1.17
+++ Src/Zle/zle_keymap.c	8 Sep 2005 10:24:14 -0000
@@ -1341,7 +1341,7 @@
 static int
 getkeybuf(int w)
 {
-    int c = getbyte(w);
+    int c = getbyte(w, NULL);
 
     if(c < 0)
 	return EOF;
Index: Src/Zle/zle_main.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_main.c,v
retrieving revision 1.73
diff -u -r1.73 zle_main.c
--- Src/Zle/zle_main.c	10 Aug 2005 10:56:41 -0000	1.73
+++ Src/Zle/zle_main.c	8 Sep 2005 10:24:14 -0000
@@ -628,13 +628,16 @@
 
 /**/
 mod_export int
-getbyte(int keytmout)
+getbyte(int keytmout, int *timeout)
 {
     char cc;
     unsigned int ret;
     int die = 0, r, icnt = 0;
     int old_errno = errno, obreaks = breaks;
 
+    if (timeout)
+	*timeout = 0;
+
 #ifdef ZLE_UNICODE_SUPPORT
     /*
      * Reading a single byte always invalidates the status
@@ -660,8 +663,12 @@
 	    dont_queue_signals();
 	    r = raw_getbyte(keytmout, &cc);
 	    restore_queue_signals(q);
-	    if (r == -2)	/* timeout */
+	    if (r == -2) {
+		/* timeout */
+		if (timeout)
+		    *timeout = 1;
 		return lastchar = EOF;
+	    }
 	    if (r == 1)
 		break;
 	    if (r == 0) {
@@ -733,7 +740,7 @@
 mod_export ZLE_INT_T
 getfullchar(int keytmout)
 {
-    int inchar = getbyte(keytmout);
+    int inchar = getbyte(keytmout, NULL);
 
 #ifdef ZLE_UNICODE_SUPPORT
     return getrestchar(inchar);
@@ -759,7 +766,7 @@
     /* char cnull = '\0'; */
     char c = inchar;
     wchar_t outchar;
-    int ret;
+    int ret, timeout;
     static mbstate_t ps;
 
     /*
@@ -784,12 +791,30 @@
 	    return lastchar_wide = WEOF;
 	}
 
-	/* No timeout here as we really need the character. */
-	inchar = getbyte(0);
+	/*
+	 * Always apply KEYTIMEOUT to the remains of the input
+	 * character.  The parts of a multibyte character should
+	 * arrive together.  If we don't do this the input can
+	 * get stuck if an invalid byte sequence arrives.
+	 */
+	inchar = getbyte(1, &timeout);
 	/* getbyte deliberately resets lastchar_wide_valid */
 	lastchar_wide_valid = 1;
-	if (inchar == EOF)
-	    return lastchar_wide = WEOF;
+	if (inchar == EOF) {
+	    if (timeout)
+	    {
+		/*
+		 * This case means that we got a valid initial byte
+		 * (since we tested for EOF above), but the followup
+		 * timed out.  This probably indicates a duff character.
+		 * Reset the shift state and return a '?'.
+		 */
+		memset(&ps, 0, sizeof(ps));
+		lastchar_wide = L'?';
+	    }
+	    else
+		return lastchar_wide = WEOF;
+	}
 	c = inchar;
     }
     return lastchar_wide = (ZLE_INT_T)outchar;
Index: Src/Zle/zle_misc.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_misc.c,v
retrieving revision 1.26
diff -u -r1.26 zle_misc.c
--- Src/Zle/zle_misc.c	15 Aug 2005 15:47:54 -0000	1.26
+++ Src/Zle/zle_misc.c	8 Sep 2005 10:24:14 -0000
@@ -595,7 +595,7 @@
      *
      * Hence for now this remains byte-by-byte.
      */
-    while ((gotk = getbyte(0)) != EOF) {
+    while ((gotk = getbyte(0, NULL)) != EOF) {
 	if (gotk == '-' && !digcnt) {
 	    minus = -1;
 	    digcnt++;
Index: Src/Zle/zle_vi.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_vi.c,v
retrieving revision 1.10
diff -u -r1.10 zle_vi.c
--- Src/Zle/zle_vi.c	17 Aug 2005 19:26:03 -0000	1.10
+++ Src/Zle/zle_vi.c	8 Sep 2005 10:24:14 -0000
@@ -108,7 +108,7 @@
     char m[3], *str;
     Thingy cmd;
 
-    if(getbyte(0) == EOF)
+    if (getbyte(0, NULL) == EOF)
 	return ZLEEOF;
 
     m[0] = lastchar;


-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070


**********************************************************************
This email and any files transmitted with it are confidential and
intended solely for the use of the individual or entity to whom they
are addressed. If you have received this email in error please notify
the system manager.

**********************************************************************



Messages sorted by: Reverse Date, Date, Thread, Author