Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: [BUG] SIGSEGV under certain circumstances



On Mar 6,  9:47am, Peter Stephenson wrote:
} Subject: Re: [BUG] SIGSEGV under certain circumstances
}
} Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
} > Am I going astray here?
} 
} I don't understand your point since the functions being called here are
} all adapted for metafied multibyte characters, but I'm obviously missing
} something.

No, it's me that's missing something.  The patch I sent in 40754 is more
complicated than necessary, because I didn't bother to check whether the
word "meta" in the name "mb_metacharlenconv" actually meant that it was
handling zsh metafied pairs.

At the same time, this still means that pattern_match() was using the
unmetafied wide characters to do comparisons, but cfp_matcher_pats()
was counting bytes as if there were at most one metafied pair making
up the whole of each character.

There's also the question of whether there is one entry in the "ms"
array of Cmatcher for every byte in the input string (including the
Meta bytes), or one entry for every character, or (the worst case)
one entry for every byte-or-metafied-pair of which there may be more
than one of each per wide character.

I think the answer is that since the ms array is being filled here to
be passed down to cfp_matcher_range(), it should be one entry for each
character.  cfp_matcher_range() does mp++ at the end of its loop.  So
incrementing mp by one each time in cfp_matcher_pats() must also be
the right thing.

Here's a corrected patch with unmeta_one() simplified.  Again this is
against the master, not on top of previous patches.  I'm reasonably
confident in this now, so inclined to commit and wait for explosions.
Y02compmatch passes, so I don't think anything basic is broken.

diff --git a/Src/Zle/compmatch.c b/Src/Zle/compmatch.c
index aedf463..1cdbb8a 100644
--- a/Src/Zle/compmatch.c
+++ b/Src/Zle/compmatch.c
@@ -1548,27 +1548,11 @@ pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
 {
     convchar_t c, wc;
     convchar_t ind, wind;
-    int len = 0, wlen, mt, wmt;
-#ifdef MULTIBYTE_SUPPORT
-    mbstate_t lstate, wstate;
-
-    memset(&lstate, 0, sizeof(lstate));
-    memset(&wstate, 0, sizeof(wstate));
-#endif
+    int len = 0, wlen = 0, mt, wmt;
 
     while (p && wp && *s && *ws) {
 	/* First test the word character */
-#ifdef MULTIBYTE_SUPPORT
-	wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
-#else
-	if (*ws == Meta) {
-	    wc = STOUC(ws[1]) ^ 32;
-	    wlen = 2;
-	} else {
-	    wc = STOUC(*ws);
-	    wlen = 1;
-	}
-#endif
+	wc = unmeta_one(ws, &wlen);
 	wind = pattern_match1(wp, wc, &wmt);
 	if (!wind)
 	    return 0;
@@ -1576,18 +1560,7 @@ pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
 	/*
 	 * Now the line character.
 	 */
-#ifdef MULTIBYTE_SUPPORT
-	len = mb_metacharlenconv_r(s, &c, &lstate);
-#else
-	/* We have the character itself. */
-	if (*s == Meta) {
-	    c = STOUC(s[1]) ^ 32;
-	    len = 2;
-	} else {
-	    c = STOUC(*s);
-	    len = 1;
-	}
-#endif
+	c = unmeta_one(s, &len);
 	/*
 	 * If either is "?", they match each other; no further tests.
 	 * Apply this even if the character wasn't convertable;
@@ -1627,17 +1600,7 @@ pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
     }
 
     while (p && *s) {
-#ifdef MULTIBYTE_SUPPORT
-	len = mb_metacharlenconv_r(s, &c, &lstate);
-#else
-	if (*s == Meta) {
-	    c = STOUC(s[1]) ^ 32;
-	    len = 2;
-	} else {
-	    c = STOUC(*s);
-	    len = 1;
-	}
-#endif
+	c = unmeta_one(s, &len);
 	if (!pattern_match1(p, c, &mt))
 	    return 0;
 	p = p->next;
@@ -1645,17 +1608,7 @@ pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
     }
 
     while (wp && *ws) {
-#ifdef MULTIBYTE_SUPPORT
-	wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
-#else
-	if (*ws == Meta) {
-	    wc = STOUC(ws[1]) ^ 32;
-	    wlen = 2;
-	} else {
-	    wc = STOUC(*ws);
-	    wlen = 1;
-	}
-#endif
+	wc = unmeta_one(ws, &wlen);
 	if (!pattern_match1(wp, wc, &wmt))
 	    return 0;
 	wp = wp->next;
diff --git a/Src/Zle/computil.c b/Src/Zle/computil.c
index 325da6d..e704f9f 100644
--- a/Src/Zle/computil.c
+++ b/Src/Zle/computil.c
@@ -4465,17 +4465,24 @@ cfp_matcher_pats(char *matcher, char *add)
     if (m && m != pcm_err) {
 	char *tmp;
 	int al = strlen(add), zl = ztrlen(add), tl, cl;
-	VARARR(Cmatcher, ms, zl);
+	VARARR(Cmatcher, ms, zl);	/* One Cmatcher per character */
 	Cmatcher *mp;
 	Cpattern stopp;
 	int stopl = 0;
 
+	/* zl >= (number of wide characters) is guaranteed */
 	memset(ms, 0, zl * sizeof(Cmatcher));
 
 	for (; m && *add; m = m->next) {
 	    stopp = NULL;
 	    if (!(m->flags & (CMF_LEFT|CMF_RIGHT))) {
 		if (m->llen == 1 && m->wlen == 1) {
+		    /*
+		     * In this loop and similar loops below we step
+		     * through tmp one (possibly wide) character at a
+		     * time.  pattern_match() compares only the first
+		     * character using unmeta_one() so keep in step.
+		     */
 		    for (tmp = add, tl = al, mp = ms; tl; ) {
 			if (pattern_match(m->line, tmp, NULL, NULL)) {
 			    if (*mp) {
@@ -4485,10 +4492,10 @@ cfp_matcher_pats(char *matcher, char *add)
 			    } else
 				*mp = m;
 			}
-			cl = (*tmp == Meta) ? 2 : 1;
+			(void) unmeta_one(tmp, &cl);
 			tl -= cl;
 			tmp += cl;
-			mp += cl;
+			mp++;
 		    }
 		} else {
 		    stopp = m->line;
@@ -4505,10 +4512,10 @@ cfp_matcher_pats(char *matcher, char *add)
 			    } else
 				*mp = m;
 			}
-			cl = (*tmp == Meta) ? 2 : 1;
+			(void) unmeta_one(tmp, &cl);
 			tl -= cl;
 			tmp += cl;
-			mp += cl;
+			mp++;
 		    }
 		} else if (m->llen) {
 		    stopp = m->line;
@@ -4531,7 +4538,7 @@ cfp_matcher_pats(char *matcher, char *add)
 			al = tmp - add;
 			break;
 		    }
-		    cl = (*tmp == Meta) ? 2 : 1;
+		    (void) unmeta_one(tmp, &cl);
 		    tl -= cl;
 		    tmp += cl;
 		}
diff --git a/Src/utils.c b/Src/utils.c
index 7f3ddad..839575b 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4788,6 +4788,48 @@ unmeta(const char *file_name)
 }
 
 /*
+ * Unmetafy just one character and store the number of bytes it occupied.
+ */
+/**/
+mod_export convchar_t
+unmeta_one(const char *in, int *sz)
+{
+    convchar_t wc;
+    int newsz;
+#ifdef MULTIBYTE_SUPPORT
+    int ulen;
+    mbstate_t wstate;
+#endif
+
+    if (!sz)
+	sz = &newsz;
+    *sz = 0;
+
+    if (!in || !*in)
+	return 0;
+
+#ifdef MULTIBYTE_SUPPORT
+    memset(&wstate, 0, sizeof(wstate));
+    ulen = mb_metacharlenconv_r(in, &wc, &wstate);
+    while (ulen-- > 0) {
+	if (in[*sz] == Meta)
+	    *sz += 2;
+	else
+	    *sz += 1;
+    }
+#else
+    if (in[0] == Meta) {
+      *sz = 2;
+      wc = STOUC(in[1] ^ 32);
+    } else {
+      *sz = 1;
+      wc = STOUC(in[0]);
+    }
+#endif
+    return wc;
+}
+
+/*
  * Unmetafy and compare two strings, comparing unsigned character values.
  * "a\0" sorts after "a".
  *



Messages sorted by: Reverse Date, Date, Thread, Author