Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: fix matching strings inside multibyte character.



I'm hoping the compmatch.c chunk fixes the problem that the completion
code could decide two strings were identical up to a point in the middle
of a multibyte character, causing inconsistent output.  (This probably
wasn't as bad as the case where it didn't take account of Meta's: that
actually caused crashes.)  I've tried it out with some Hebrew and it
seems to be in the right target area, but it's quite hairy and I'm not
100% sure.

I'm a little surprised it just needs fixing in the one place, but that's
the only one I know about.

The other hunk are minor consistency fixes: make a type correct to be
passed as a pointer, and remove a commented-out variable that's no
longer useful.

Index: Src/Zle/complist.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/complist.c,v
retrieving revision 1.73
diff -u -r1.73 complist.c
--- Src/Zle/complist.c	29 Sep 2005 17:32:38 -0000	1.73
+++ Src/Zle/complist.c	2 Oct 2005 18:55:49 -0000
@@ -575,7 +575,8 @@
      * ps is the shift state of the conversion to wide characters.
      */
     char *ums, *uptr, *sptr, *wptr;
-    int ret, umleft, umlen, width;
+    int ret, umleft, umlen;
+    size_t width;
     mbstate_t ps;
 
     memset(&ps, 0, sizeof(ps));
Index: Src/Zle/compmatch.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/compmatch.c,v
retrieving revision 1.43
diff -u -r1.43 compmatch.c
--- Src/Zle/compmatch.c	6 Jul 2005 10:01:25 -0000	1.43
+++ Src/Zle/compmatch.c	2 Oct 2005 18:56:09 -0000
@@ -1584,6 +1584,11 @@
 {
     int ret = 0, l, ind, add;
     char *p, *q;
+#ifdef ZLE_UNICODE_SUPPORT
+    int fulllen = len;
+    char *fullstr = str;
+    mbstate_t ps;
+#endif
 
     if (sfx) {
 	str += len;
@@ -1614,6 +1619,85 @@
 		   || (l < md->len && q[-1] == Meta)))
 		l--;
 	}
+#ifdef ZLE_UNICODE_SUPPORT
+	/*
+	 * Make sure we don't end in the middle of a multibyte character.
+	 * Don't need to do this if the match ended at the start
+	 * of the original string.
+	 *
+	 * Let q be the match point we've found.
+	 */
+	q = sfx ? str - l : str + l;
+	if (q != fullstr) {
+	    memset(&ps, 0, sizeof(ps));
+	    /*
+	     * Otherwise read characters from the start of the original
+	     * string until we reach or pass the match point.  This
+	     * is rather inefficient, but in general only reading
+	     * the full string can keep track of where we are in
+	     * a character.  With a prefix we could be more efficient,
+	     * but it's difficult with a suffix where the match point
+	     * moves backwards.
+	     */
+	    for (p = fullstr; p < fullstr + fulllen; ) {
+		wchar_t wc;
+		/*
+		 * ret must, in fact, be set by the current logic,
+		 * but gcc doesn't realise (at least some versions don't).
+		 */
+		int ret = -1, diff;
+		char *p2;
+
+		/*
+		 * Because the string is metafied, we need to
+		 * assembled wide characters a byte at a time.
+		 */
+		for (p2 = p; p2 < fullstr + fulllen; p2++) {
+		  char curchar = (*p2 == Meta) ? (*++p2 ^ 32) : *p2;
+		  ret = mbrtowc(&wc, &curchar, 1, &ps);
+		  /*
+		   * Continue while character is incomplete.
+		   */
+		  if (ret != -2)
+		    break;
+		}
+		if (ret < 0) {
+		    /* not a valid character, give up test */
+		    break;
+		}
+		/* increment p2 for last byte read */
+		diff = ++p2 - q;
+		if (diff == 0) {
+		    /*
+		     * Prefix or suffix matches at end of multbyte character,
+		     * so OK.
+		     */
+		    break;
+		} else if (diff > 0) {
+		    /*
+		     * The prefix or suffix finishes in the middle
+		     * of a character.  Shorten it until it doesn't.
+		     */
+		    if (sfx) {
+			/*
+			 * We need to remove the trailing part of
+			 * the character from the suffix.
+			 */
+			l -= diff;
+		    } else {
+			/*
+			 * We need to remove the initial part of
+			 * the character from the prefix.
+			 */
+			l -= (q - p);
+		    }
+		    break;
+		}
+		/* Advance over full character */
+		p += ret;
+	    }
+	}
+#endif
 	if (l) {
 	    /* There was a common prefix, use it. */
 	    md->len -= l; len -= l;
Index: Src/Zle/zle_main.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_main.c,v
retrieving revision 1.77
diff -u -r1.77 zle_main.c
--- Src/Zle/zle_main.c	29 Sep 2005 17:32:38 -0000	1.77
+++ Src/Zle/zle_main.c	2 Oct 2005 18:56:23 -0000
@@ -758,7 +758,6 @@
 mod_export ZLE_INT_T
 getrestchar(int inchar)
 {
-    /* char cnull = '\0'; */
     char c = inchar;
     wchar_t outchar;
     int ret, timeout;

-- 
Peter Stephenson <pws@xxxxxxxxxxxxxxxxxxxxxxxx>
Work: pws@xxxxxxx
Web: http://www.pwstephenson.fsnet.co.uk



Messages sorted by: Reverse Date, Date, Thread, Author