Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: fix the word boundary stuff



As I flagged up earlier, I think on reflection that zero-width
punctuation characters should be treated as part of a word even if
COMBINING_CHARS is off, since logically it doesn't really matter how
they're displayed.

This doesn't fix the ...-match function widgets.

Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.184
diff -u -r1.184 utils.c
--- Src/utils.c	13 Apr 2008 16:58:42 -0000	1.184
+++ Src/utils.c	17 Apr 2008 12:06:34 -0000
@@ -3082,12 +3082,13 @@
 	    if (iswalnum(c))
 		return 1;
 	    /*
-	     * If we are handling combining characters, anything
-	     * printable with zero width needs to be considered
-	     * part of a word.
+	     * If we are handling combining characters, any punctuation
+	     * characters with zero width needs to be considered part of
+	     * a word.  If we are not handling combining characters then
+	     * logically they are still part of the word, even if they
+	     * don't get displayed properly, so always do this.
 	     */
-	    if (isset(COMBININGCHARS) &&
-		iswprint(c) && wcwidth(c) == 0)
+	    if (iswpunct(c) && wcwidth(c) == 0)
 		return 1;
 	    return !!wmemchr(wordchars_wide.chars, c, wordchars_wide.len);
 
Index: Src/Zle/zle_word.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_word.c,v
retrieving revision 1.10
diff -u -r1.10 zle_word.c
--- Src/Zle/zle_word.c	13 Apr 2008 16:58:44 -0000	1.10
+++ Src/Zle/zle_word.c	17 Apr 2008 12:06:35 -0000
@@ -31,9 +31,13 @@
 #include "zle_word.pro"
 
 /*
- * HERE: our handling of combining characters may be wrong.  We
- * should make sure we only consider a combining character part of
- * a word if the base character is.
+ * In principle we shouldn't consider a zero-length punctuation
+ * character (i.e. a modifier of some sort) part of the word unless
+ * the base character has.  However, we only consider them part of
+ * a word if we so consider all alphanumerics, so the distinction
+ * only applies if the characters are modifying something they probably
+ * ought not to be modifying.  It's not really clear we need to
+ * be clever about this not very useful case.
  */
 
 /**/
@@ -147,11 +151,20 @@
     if (n < 0)
 	return 1;
     while (n--) {
-	/* HERE: the zlecs + 1 here is suspect */
-	while (zlecs != zlell && ZC_iblank(zleline[zlecs + 1]))
-	    INCCS();
-	while (zlecs != zlell && !ZC_iblank(zleline[zlecs + 1]))
-	    INCCS();
+	while (zlecs != zlell) {
+	    int pos = zlecs;
+	    INCPOS(pos);
+	    if (!ZC_iblank(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
+	while (zlecs != zlell) {
+	    int pos = zlecs;
+	    INCPOS(pos);
+	    if (ZC_iblank(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
     }
     if (zlecs != zlell && virangeflag)
 	INCCS();
@@ -173,15 +186,37 @@
     }
     while (n--) {
 	/* HERE: the zlecs + 1 here is suspect */
-	if (ZC_iblank(zleline[zlecs + 1]))
-	    while (zlecs != zlell && ZC_iblank(zleline[zlecs + 1]))
-		INCCS();
-	if (Z_vialnum(zleline[zlecs + 1]))
-	    while (zlecs != zlell && Z_vialnum(zleline[zlecs + 1]))
-		INCCS();
-	else
-	    while (zlecs != zlell && !Z_vialnum(zleline[zlecs + 1]) && !ZC_iblank(zleline[zlecs + 1]))
-		INCCS();
+	int pos;
+	while (zlecs != zlell) {
+	    pos = zlecs;
+	    INCPOS(pos);
+	    if (!ZC_inblank(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
+	if (zlecs != zlell) {
+	    pos = zlecs;
+	    INCPOS(pos);
+	    if (Z_vialnum(zleline[pos])) {
+		for (;;) {
+		    zlecs = pos;
+		    if (zlecs == zlell)
+			break;
+		    INCPOS(pos);
+		    if (!Z_vialnum(zleline[pos]))
+			break;
+		}
+	    } else {
+		for (;;) {
+		    zlecs = pos;
+		    if (zlecs == zlell)
+			break;
+		    INCPOS(pos);
+		    if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos]))
+			break;
+		}
+	    }
+	}
     }
     if (zlecs != zlell && virangeflag)
 	INCCS();
@@ -202,11 +237,20 @@
 	return ret;
     }
     while (n--) {
-	/* HERE: the zlecs - 1 here is suspect */
-	while (zlecs && !ZC_iword(zleline[zlecs - 1]))
-	    DECCS();
-	while (zlecs && ZC_iword(zleline[zlecs - 1]))
-	    DECCS();
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (ZC_iword(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (!ZC_iword(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
     }
     return 0;
 }
@@ -225,15 +269,36 @@
 	return ret;
     }
     while (n--) {
-	/* HERE: the zlecs - 1 here is suspect */
-	while (zlecs && ZC_iblank(zleline[zlecs - 1]))
-	    DECCS();
-	if (Z_vialnum(zleline[zlecs - 1]))
-	    while (zlecs && Z_vialnum(zleline[zlecs - 1]))
-		DECCS();
-	else
-	    while (zlecs && !Z_vialnum(zleline[zlecs - 1]) && !ZC_iblank(zleline[zlecs - 1]))
-		DECCS();
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (!ZC_iblank(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
+	if (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (Z_vialnum(zleline[pos])) {
+		for (;;) {
+		    zlecs = pos;
+		    if (zlecs == 0)
+			break;
+		    DECPOS(pos);
+		    if (!Z_vialnum(zleline[pos]))
+			break;
+		}
+	    } else {
+		for (;;) {
+		    zlecs = pos;
+		    if (zlecs == 0)
+			break;
+		    DECPOS(pos);
+		    if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos]))
+			break;
+		}
+	    }
+	}
     }
     return 0;
 }
@@ -252,10 +317,20 @@
 	return ret;
     }
     while (n--) {
-	while (zlecs && ZC_iblank(zleline[zlecs - 1]))
-	    DECCS();
-	while (zlecs && !ZC_iblank(zleline[zlecs - 1]))
-	    DECCS();
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (!ZC_iblank(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (ZC_iblank(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
     }
     return 0;
 }
@@ -274,10 +349,20 @@
 	return ret;
     }
     while (n--) {
-	while (zlecs && !ZC_iword(zleline[zlecs - 1]))
-	    DECCS();
-	while (zlecs && ZC_iword(zleline[zlecs - 1]))
-	    DECCS();
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (ZC_iword(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
+	while (zlecs) {
+	    int pos = zlecs;
+	    DECPOS(pos);
+	    if (!ZC_iword(zleline[pos]))
+		break;
+	    zlecs = pos;
+	}
     }
     return 0;
 }
@@ -296,14 +381,20 @@
 	return ret;
     }
     while (n--) {
-	/*
-	 * HERE: the zlecs - 1 here is suspect, and we should
-	 * do the DECCS() thing.
-	 */
-	while (x && !ZC_iword(zleline[x - 1]))
-	    x--;
-	while (x && ZC_iword(zleline[x - 1]))
-	    x--;
+	while (x) {
+	    int pos = x;
+	    DECPOS(pos);
+	    if (ZC_iword(zleline[pos]))
+		break;
+	    x = pos;
+	}
+	while (x) {
+	    int pos = x;
+	    DECPOS(pos);
+	    if (!ZC_iword(zleline[pos]))
+		break;
+	    x = pos;
+	}
     }
     backdel(zlecs - x, CUT_RAW);
     return 0;
@@ -320,18 +411,36 @@
 	return 1;
 /* this taken from "vibackwardword" */
     while (n--) {
-	/*
-	 * HERE: the zlecs - 1 here is suspect, and we should
-	 * do the DECCS() thing.
-	 */
-	while ((x > lim) && ZC_iblank(zleline[x - 1]))
-	    x--;
-	if (Z_vialnum(zleline[x - 1]))
-	    while ((x > lim) && Z_vialnum(zleline[x - 1]))
-		x--;
-	else
-	    while ((x > lim) && !Z_vialnum(zleline[x - 1]) && !ZC_iblank(zleline[x - 1]))
-		x--;
+	while (x > lim) {
+	    int pos = x;
+	    DECPOS(pos);
+	    if (!ZC_iblank(zleline[pos]))
+		break;
+	    x = pos;
+	}
+	if (x > lim) {
+	    int pos = x;
+	    DECPOS(pos);
+	    if (Z_vialnum(zleline[pos])) {
+		for (;;) {
+		    x = pos;
+		    if (x <= lim)
+			break;
+		    DECPOS(pos);
+		    if (!Z_vialnum(zleline[pos]))
+			break;
+		}
+	    } else {
+		for (;;) {
+		    x = pos;
+		    if (x <= lim)
+			break;
+		    DECPOS(pos);
+		    if (Z_vialnum(zleline[pos]) || ZC_iblank(zleline[pos]))
+			break;
+		}
+	    }
+	}
     }
     backkill(zlecs - x, CUT_FRONT);
     return 0;
@@ -352,14 +461,20 @@
 	return ret;
     }
     while (n--) {
-	/*
-	 * HERE: the zlecs - 1 here is suspect, and we should
-	 * do the DECCS() thing.
-	 */
-	while (x && !ZC_iword(zleline[x - 1]))
-	    x--;
-	while (x && ZC_iword(zleline[x - 1]))
-	    x--;
+	while (x) {
+	    int pos = x;
+	    DECPOS(pos);
+	    if (ZC_iword(zleline[x]))
+		break;
+	    x = pos;
+	}
+	while (x) {
+	    int pos = x;
+	    DECPOS(pos);
+	    if (!ZC_iword(zleline[x]))
+		break;
+	    x = pos;
+	}
     }
     backkill(zlecs - x, CUT_FRONT);
     return 0;
@@ -451,11 +566,10 @@
 	return ret;
     }
     while (n--) {
-	/* HERE: we should do the INCCS() thing */
 	while (x != zlell && !ZC_iword(zleline[x]))
-	    x++;
+	    INCPOS(x);
 	while (x != zlell && ZC_iword(zleline[x]))
-	    x++;
+	    INCPOS(x);
     }
     foredel(x - zlecs, CUT_RAW);
     return 0;
@@ -476,11 +590,10 @@
 	return ret;
     }
     while (n--) {
-	/* HERE: we should do the INCCS() thing */
 	while (x != zlell && !ZC_iword(zleline[x]))
-	    x++;
+	    INCPOS(x);
 	while (x != zlell && ZC_iword(zleline[x]))
-	    x++;
+	    INCPOS(x);
     }
     forekill(x - zlecs, CUT_RAW);
     return 0;
@@ -490,7 +603,7 @@
 int
 transposewords(UNUSED(char **args))
 {
-    int p1, p2, p3, p4, len, x = zlecs;
+    int p1, p2, p3, p4, len, x = zlecs, pos;
     ZLE_STRING_T temp, pp;
     int n = zmult;
     int neg = n < 0, ocs = zlecs;
@@ -498,28 +611,54 @@
     if (neg)
 	n = -n;
     while (n--) {
-	/*
-	 * HERE: we should do the INCCS() thing.
-	 * A great deal of the following needs rewriting.
-	 */
 	while (x != zlell && zleline[x] != ZWC('\n') && !ZC_iword(zleline[x]))
-	    x++;
+	    INCPOS(x);
 	if (x == zlell || zleline[x] == ZWC('\n')) {
 	    x = zlecs;
-	    while (x && zleline[x - 1] != ZWC('\n') && !ZC_iword(zleline[x]))
-		x--;
-	    if (!x || zleline[x - 1] == ZWC('\n'))
+	    while (x) {
+		if (ZC_iword(zleline[x]))
+		    break;
+		pos = x;
+		DECPOS(pos);
+		if (zleline[pos] == ZWC('\n'))
+		    break;
+		x = pos;
+	    }
+	    if (!x)
+		return 1;
+	    pos = x;
+	    DECPOS(pos);
+	    if (zleline[pos] == ZWC('\n'))
 		return 1;
+	    x = pos;
+	}
+	for (p4 = x; p4 != zlell && ZC_iword(zleline[p4]); INCPOS(p4))
+	    ;
+	for (p3 = p4; p3; ) {
+	    pos = p3;
+	    DECPOS(pos);
+	    if (!ZC_iword(zleline[pos]))
+		break;
+	    p3 = pos;
 	}
-	for (p4 = x; p4 != zlell && ZC_iword(zleline[p4]); p4++);
-	for (p3 = p4; p3 && ZC_iword(zleline[p3 - 1]); p3--);
 	if (!p3)
 	    return 1;
-	for (p2 = p3; p2 && !ZC_iword(zleline[p2 - 1]); p2--);
+	for (p2 = p3; p2; ) {
+	    pos = p2;
+	    DECPOS(pos);
+	    if (ZC_iword(zleline[pos]))
+		break;
+	    p2 = pos;
+	}
 	if (!p2)
 	    return 1;
-	for (p1 = p2; p1 && ZC_iword(zleline[p1 - 1]); p1--);
-
+	for (p1 = p2; p1; ) {
+	    pos = p1;
+	    DECPOS(pos);
+	    if (!ZC_iword(zleline[pos]))
+		break;
+	    p1 = pos;
+	}
 	pp = temp = (ZLE_STRING_T)zhalloc((p4 - p1)*ZLE_CHAR_SIZE);
 	len = p4 - p3;
 	ZS_memcpy(pp, zleline + p3, len);


-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070



Messages sorted by: Reverse Date, Date, Thread, Author