Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Substitution ${...///} slows down when certain UTF character occurs



This uses the new interface.  I haven't done any testing apart from the
normal test suite.

There's one change I still want to make, which is to put the remaining
allocatiion onto the heap --- it should be used infrequently enough to
make this feasible.  However, it's probably seeing what the effects are
before that.

There's a reasonable chance there's still some glitch with metafied
characters.  We could probably do with some more parameter substitution
tests involving Mikael's interesting characters, possibly more failure
cases, too.

pws

diff --git a/Src/glob.c b/Src/glob.c
index 8bf7352..0594f0a 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2450,29 +2450,46 @@ matchpat(char *a, char *b)
 /* please do not laugh at this code. */
 
 /* Having found a match in getmatch, decide what part of string
- * to return.  The matched part starts b characters into string s
- * and finishes e characters in: 0 <= b <= e <= strlen(s)
+ * to return.  The matched part starts b characters into string imd->ustr
+ * and finishes e characters in: 0 <= b <= e <= imd->ulen on input
  * (yes, empty matches should work).
- * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards;
- * the lower parts are ignored.
- * replstr is the replacement string for a substitution
+ *
+ * imd->flags is a set of the SUB_* matches defined in zsh.h from
+ * SUB_MATCH onwards; the lower parts are ignored.
+ *
+ * imd->replstr is the replacement string for a substitution
+ *
+ * imd->replstr is metafied and the values put in imd->repllist are metafied.
  */
 
 /**/
 static char *
-get_match_ret(char *s, int b, int e, int fl, char *replstr,
-	      LinkList repllist)
+get_match_ret(Imatchdata imd, int b, int e)
 {
-    char buf[80], *r, *p, *rr;
-    int ll = 0, l = strlen(s), bl = 0, t = 0, i;
-
+    char buf[80], *r, *p, *rr, *replstr = imd->replstr;
+    int ll = 0, bl = 0, t = 0, add = 0, fl = imd->flags, i;
+
+    /* Account for b and e referring to unmetafied string */
+    for (p = imd->ustr; p < imd->ustr + b; p++)
+	if (imeta(*p))
+	    add++;
+    b += add;
+    for (; p < imd->ustr + e; p++)
+	if (imeta(*p))
+	    add++;
+    e += add;
+    for (; p < imd->ustr + imd->ulen; p++)
+	if (imeta(*p))
+	    add++;
+
+    /* Everything now refers to meatfied lengths. */
     if (replstr || (fl & SUB_LIST)) {
 	if (fl & SUB_DOSUBST) {
 	    replstr = dupstring(replstr);
 	    singsub(&replstr);
 	    untokenize(replstr);
 	}
-	if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) {
+	if ((fl & (SUB_GLOBAL|SUB_LIST)) && imd->repllist) {
 	    /* We are replacing the chunk, just add this to the list */
 	    Repldata rd = (Repldata)
 		((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd)));
@@ -2480,30 +2497,32 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
 	    rd->e = e;
 	    rd->replstr = replstr;
 	    if (fl & SUB_LIST)
-		zaddlinknode(repllist, rd);
+		zaddlinknode(imd->repllist, rd);
 	    else
-		addlinknode(repllist, rd);
-	    return s;
+		addlinknode(imd->repllist, rd);
+	    return imd->mstr;
 	}
 	ll += strlen(replstr);
     }
     if (fl & SUB_MATCH)			/* matched portion */
 	ll += 1 + (e - b);
     if (fl & SUB_REST)		/* unmatched portion */
-	ll += 1 + (l - (e - b));
+	ll += 1 + (imd->mlen - (e - b));
     if (fl & SUB_BIND) {
 	/* position of start of matched portion */
-	sprintf(buf, "%d ", MB_METASTRLEN2END(s, 0, s+b) + 1);
+	sprintf(buf, "%d ", MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+b) + 1);
 	ll += (bl = strlen(buf));
     }
     if (fl & SUB_EIND) {
 	/* position of end of matched portion */
-	sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s, 0, s+e) + 1);
+	sprintf(buf + bl, "%d ",
+		MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+e) + 1);
 	ll += (bl = strlen(buf));
     }
     if (fl & SUB_LEN) {
 	/* length of matched portion */
-	sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s+b, 0, s+e));
+	sprintf(buf + bl, "%d ", MB_METASTRLEN2END(imd->mstr+b, 0,
+						   imd->mstr+e));
 	ll += (bl = strlen(buf));
     }
     if (bl)
@@ -2513,7 +2532,7 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
 
     if (fl & SUB_MATCH) {
 	/* copy matched portion to new buffer */
-	for (i = b, p = s + b; i < e; i++)
+	for (i = b, p = imd->mstr + b; i < e; i++)
 	    *rr++ = *p++;
 	t = 1;
     }
@@ -2523,12 +2542,12 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr,
 	if (t)
 	    *rr++ = ' ';
 	/* there may be unmatched bits at both beginning and end of string */
-	for (i = 0, p = s; i < b; i++)
+	for (i = 0, p = imd->mstr; i < b; i++)
 	    *rr++ = *p++;
 	if (replstr)
 	    for (p = replstr; *p; )
 		*rr++ = *p++;
-	for (i = e, p = s + e; i < l; i++)
+	for (i = e, p = imd->mstr + e; i < imd->mlen; i++)
 	    *rr++ = *p++;
 	t = 1;
     }
@@ -2710,26 +2729,18 @@ set_pat_end(Patprog p, char null_me)
 
 /*
  * Increment *tp over character which may be multibyte.
- * Return number of bytes that remain in the character after unmetafication.
+ * Return number of bytes.
+ * All unmetafied here.
  */
 
 /**/
-static int iincchar(char **tp)
+static int iincchar(char **tp, int left)
 {
     char *t = *tp;
-    int mbclen = mb_metacharlenconv(t, NULL);
-    int umlen = 0;
-
-    while (mbclen--) {
-	umlen++;
-	if (*t++ == Meta) {
-	    t++;
-	    mbclen--;
-	}
-    }
-    *tp = t;
+    int mbclen = mb_charlenconv(t, left, NULL);
+    *tp = t + mbclen;
 
-    return umlen;
+    return mbclen;
 }
 
 /**/
@@ -2737,7 +2748,7 @@ static int
 igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	  LinkList *repllistp)
 {
-    char *s = *sp, *t, *tmatch;
+    char *s = *sp, *t, *tmatch, *send;
     /*
      * Note that ioff counts (possibly multibyte) characters in the
      * character set (Meta's are not included), while l counts characters in
@@ -2752,36 +2763,52 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
      */
     int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp);
     int umlen, nmatches;
-    /*
-     * List of bits of matches to concatenate with replacement string.
-     * The data is a struct repldata.  It is not used in cases like
-     * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
-     * is anchored.  It goes on the heap.
-     */
-    LinkList repllist = NULL;
+    struct patstralloc patstralloc;
+    struct imatchdata imd;
+
+    (void)patallocstr(p, s, l, umltot, 1, &patstralloc);
+    s = patstralloc.alloced;
+    DPUTS(!s, "forced patallocstr failed");
+    send = s + umltot;
+
+    imd.mstr = *sp;
+    imd.mlen = l;
+    imd.ustr = s;
+    imd.ulen = umltot;
+    imd.flags = fl;
+    imd.replstr = replstr;
+    imd.repllist = NULL;
 
     /* perform must-match test for complex closures */
     if (p->mustoff)
     {
-	/*
-	 * Yuk.  Probably we should rewrite this whole function to
-	 * use an unmetafied test string.
-	 *
-	 * Use META_HEAPDUP because we need a terminating NULL.
-	 */
-	char *muststr = metafy((char *)p + p->mustoff,
-			       p->patmlen, META_HEAPDUP);
+	char *muststr = (char *)p + p->mustoff;
 
-	if (!strstr(s, muststr))
-	    matched = 0;
+	matched = 0;
+	if (p->patmlen <= umltot)
+	{
+	    for (t = s; t <= send - p->patmlen; t++)
+	    {
+		if (!memcmp(muststr, t, p->patmlen)) {
+		    matched = 1;
+		    break;
+		}
+	    }
+	}
     }
 
     /* in case we used the prog before... */
     p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
 
     if (fl & SUB_ALL) {
-	int i = matched && pattrylen(p, s, -1, -1, NULL, 0);
-	*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
+	int i = matched && pattrylen(p, s, umltot, 0, &patstralloc, 0);
+	if (!i) {
+	    /* Perform under no-match conditions */
+	    umltot = 0;
+	    imd.replstr = NULL;
+	}
+	*sp = get_match_ret(&imd, 0, umltot);
+	patfreestr(&patstralloc);
 	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
 	    return 0;
 	return 1;
@@ -2809,25 +2836,27 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	     * Largest/smallest possible match at head of string.
 	     * First get the longest match...
 	     */
-	    if (pattrylen(p, s, -1, -1, NULL, 0)) {
-		/* patmatchlen returns metafied length, as we need */
+	    if (pattrylen(p, s, umltot, 0, &patstralloc, 0)) {
+		/* patmatchlen returns unmetafied length in this case */
 	        int mlen = patmatchlen();
 		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
+		    send = s + mlen;
 		    /*
 		     * ... now we know whether it's worth looking for the
 		     * shortest, which we do by brute force.
 		     */
 		    mb_charinit();
-		    for (t = s, umlen = 0; t < s + mlen; ) {
+		    for (t = s, umlen = 0; t < send; ) {
 			set_pat_end(p, *t);
-			if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
+			if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
 			    mlen = patmatchlen();
 			    break;
 			}
-			umlen += iincchar(&t);
+			umlen += iincchar(&t, send - t);
 		    }
 		}
-		*sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
+		*sp = get_match_ret(&imd, 0, mlen);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2845,20 +2874,23 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	     */
 	    mb_charinit();
 	    tmatch = NULL;
-	    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+	    for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, s + l - t, umlen, NULL, ioff))
+		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff))
 		    tmatch = t;
 		if (fl & SUB_START)
 		    break;
-		umlen -= iincchar(&t);
+		umlen -= iincchar(&t, send - t);
 	    }
 	    if (tmatch) {
-		*sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL);
+		*sp = get_match_ret(&imd, tmatch - s, umltot);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
-	    if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
-		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+	    if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
+					       &patstralloc, ioff)) {
+		*sp = get_match_ret(&imd, umltot, umltot);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2868,18 +2900,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	     * move forward along string until we get a match. *
 	     * Again there's no optimisation.                  */
 	    mb_charinit();
-	    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+	    for (ioff = 0, t = s, umlen = umltot; t < send ; ioff++) {
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
-		    *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
+		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
+		    *sp = get_match_ret(&imd, t-s, umltot);
+		    patfreestr(&patstralloc);
 		    return 1;
 		}
 		if (fl & SUB_START)
 		    break;
-		umlen -= iincchar(&t);
+		umlen -= iincchar(&t, send - t);
 	    }
-	    if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) {
-		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+	    if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
+					       &patstralloc, ioff)) {
+		*sp = get_match_ret(&imd, umltot, umltot);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2887,18 +2922,20 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	case SUB_SUBSTR:
 	    /* Smallest at start, but matching substrings. */
 	    set_pat_start(p, l);
-	    if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) &&
+	    if (!(fl & SUB_GLOBAL) &&
+		pattrylen(p, send, 0, 0, &patstralloc, 0) &&
 		!--n) {
-		*sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
+		*sp = get_match_ret(&imd, 0, 0);
+		patfreestr(&patstralloc);
 		return 1;
 	    } /* fall through */
 	case (SUB_SUBSTR|SUB_LONG):
 	    /* longest or smallest at start with substrings */
 	    t = s;
 	    if (fl & SUB_GLOBAL) {
-		repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
+		imd.repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
 		if (repllistp)
-		     *repllistp = repllist;
+		     *repllistp = imd.repllist;
 	    }
 	    ioff = 0;		/* offset into string */
 	    umlen = umltot;
@@ -2906,10 +2943,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    do {
 		/* loop over all matches for global substitution */
 		matched = 0;
-		for (; t < s + l; ioff++) {
+		for (; t < send; ioff++) {
 		    /* Find the longest match from this position. */
 		    set_pat_start(p, t-s);
-		    if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+		    if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
 			char *mpos = t + patmatchlen();
 			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
 			    char *ptr;
@@ -2923,19 +2960,18 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 			     */
 			    for (ptr = t, umlen2 = 0; ptr < mpos;) {
 				set_pat_end(p, *ptr);
-				if (pattrylen(p, t, ptr - t, umlen2,
-					      NULL, ioff)) {
+				if (pattrylen(p, t, umlen2, 0,
+					      &patstralloc, ioff)) {
 				    mpos = t + patmatchlen();
 				    break;
 				}
-				umlen2 += iincchar(&ptr);
+				umlen2 += iincchar(&ptr, mpos - ptr);
 			    }
 			}
 			if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
-			    *sp = get_match_ret(*sp, t-s, mpos-s, fl,
-						replstr, repllist);
+			    *sp = get_match_ret(&imd, t-s, mpos-s);
 			    if (mpos == t)
-				mpos += mb_metacharlenconv(mpos, NULL);
+				mpos += mb_charlenconv(mpos, send - mpos, NULL);
 			}
 			if (!(fl & SUB_GLOBAL)) {
 			    if (n) {
@@ -2945,9 +2981,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 				 * the next character, even if it overlaps
 				 * with what we just found.
 				 */
-				umlen -= iincchar(&t);
+				umlen -= iincchar(&t, send - t);
 				continue;
 			    } else {
+				patfreestr(&patstralloc);
 				return 1;
 			    }
 			}
@@ -2958,11 +2995,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 			matched = 1;
 			while (t < mpos) {
 			    ioff++;
-			    umlen -= iincchar(&t);
+			    umlen -= iincchar(&t, send - t);
 			}
 			break;
 		    }
-		    umlen -= iincchar(&t);
+		    umlen -= iincchar(&t, send - t);
 		}
 	    } while (matched);
 	    /*
@@ -2972,8 +3009,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	     */
 	    set_pat_start(p, l);
 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
-		pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) {
-		*sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
+		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+		*sp = get_match_ret(&imd, 0, 0);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2983,8 +3021,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    /* Longest/shortest at end, matching substrings.       */
 	    if (!(fl & SUB_LONG)) {
 		set_pat_start(p, l);
-		if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) {
-		    *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+		if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
+		    !--n) {
+		    *sp = get_match_ret(&imd, umltot, umltot);
+		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3001,13 +3041,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    nmatches = 0;
 	    tmatch = NULL;
 	    mb_charinit();
-	    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+	    for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
 		    nmatches++;
 		    tmatch = t;
 		}
-		umlen -= iincchar(&t);
+		umlen -= iincchar(&t, send - t);
 	    }
 	    if (nmatches) {
 		char *mpos;
@@ -3017,14 +3057,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		     */
 		    n = nmatches - n;
 		    mb_charinit();
-		    for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
+		    for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
 			set_pat_start(p, t-s);
-			if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) &&
+			if (pattrylen(p, t, umlen, 0, &patstralloc, ioff) &&
 			    !n--) {
 			    tmatch = t;
 			    break;
 			}
-			umlen -= iincchar(&t);
+			umlen -= iincchar(&t, send - t);
 		    }
 		}
 		mpos = tmatch + patmatchlen();
@@ -3032,29 +3072,31 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
 		    for (t = tmatch, umlen = 0; t < mpos; ) {
 			set_pat_end(p, *t);
-			if (pattrylen(p, tmatch, t - tmatch, umlen,
-				      NULL, ioff)) {
+			if (pattrylen(p, tmatch, umlen, 0,
+				      &patstralloc, ioff)) {
 			    mpos = tmatch + patmatchlen();
 			    break;
 			}
-			umlen += iincchar(&t);
+			umlen += iincchar(&t, mpos - t);
 		    }
 		}
-		*sp = get_match_ret(*sp, tmatch-s, mpos-s, fl,
-				    replstr, NULL);
+		*sp = get_match_ret(&imd, tmatch-s, mpos-s);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    set_pat_start(p, l);
-	    if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) &&
+	    if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
+					     &patstralloc, umltot) &&
 		!--n) {
-		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+		*sp = get_match_ret(&imd, umltot, umltot);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
 	}
     }
 
-    if (repllist && nonempty(repllist)) {
+    if (imd.repllist && nonempty(imd.repllist)) {
 	/* Put all the bits of a global search and replace together. */
 	LinkNode nd;
 	Repldata rd;
@@ -3062,10 +3104,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	char *ptr, *start;
 	int i;
 
+	/*
+	 * Use metafied string again.
+	 * Results from get_match_ret in repllist are all metafied.
+	 */
+	s = *sp;
 	if (!(fl & SUB_LIST)) {
 	    lleft = 0;		/* size of returned string */
-	    i = 0;			/* start of last chunk we got from *sp */
-	    for (nd = firstnode(repllist); nd; incnode(nd)) {
+	    i = 0;	       /* start of last chunk we got from *sp */
+	    for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
 		rd = (Repldata) getdata(nd);
 		lleft += rd->b - i; /* previous chunk of *sp */
 		lleft += strlen(rd->replstr);	/* the replaced bit */
@@ -3074,7 +3121,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    lleft += l - i;	/* final chunk from *sp */
 	    start = t = zhalloc(lleft+1);
 	    i = 0;
-	    for (nd = firstnode(repllist); nd; incnode(nd)) {
+	    for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
 		rd = (Repldata) getdata(nd);
 		memcpy(t, s + i, rd->b - i);
 		t += rd->b - i;
@@ -3087,13 +3134,19 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    start[lleft] = '\0';
 	    *sp = (char *)start;
 	}
+	patfreestr(&patstralloc);
 	return 1;
     }
-    if (fl & SUB_LIST)		/* safety: don't think this can happen */
+    if (fl & SUB_LIST) {	/* safety: don't think this can happen */
+	patfreestr(&patstralloc);
 	return 0;
+    }
 
     /* munge the whole string: no match, so no replstr */
-    *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
+    imd.replstr = NULL;
+    imd.repllist = NULL;
+    *sp = get_match_ret(&imd, 0, 0);
+    patfreestr(&patstralloc);
     return (fl & SUB_RETFAIL) ? 0 : 1;
 }
 
@@ -3111,7 +3164,7 @@ static int
 igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	  LinkList *repllistp)
 {
-    char *s = *sp, *t;
+    char *s = *sp, *t, *send;
     /*
      * Note that ioff and uml count characters in the character
      * set (Meta's are not included), while l counts characters in the
@@ -3119,36 +3172,48 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
      * lengths.
      */
     int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
-    /*
-     * List of bits of matches to concatenate with replacement string.
-     * The data is a struct repldata.  It is not used in cases like
-     * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
-     * is anchored.  It goes on the heap.
-     */
-    LinkList repllist = NULL;
+    struct patstralloc patstralloc;
+    struct imatchdata imd;
+
+    (void)patallocstr(p, s, l, uml, 1, &patstralloc);
+    s = patstralloc.alloced;
+    DPUTS(!s, "forced patallocstr failed");
+    send = s + uml;
+
+    imd.mstr = *sp;
+    imd.mlen = l;
+    imd.ustr = s;
+    imd.ulen = uml;
+    imd.flags = fl;
+    imd.replstr = replstr;
+    imd.repllist = NULL;
 
     /* perform must-match test for complex closures */
     if (p->mustoff)
     {
-	/*
-	 * Yuk.  Probably we should rewrite this whole function to
-	 * use an unmetafied test string.
-	 *
-	 * Use META_HEAPDUP because we need a terminating NULL.
-	 */
-	char *muststr = metafy((char *)p + p->mustoff,
-			       p->patmlen, META_HEAPDUP);
+	char *muststr = (char *)p + p->mustoff;
 
-	if (!strstr(s, muststr))
-	    matched = 0;
+	matched = 0;
+	if (p->patmlen <= uml)
+	{
+	    for (t = s; t <= send - p->patmlen; t++)
+	    {
+		if (!memcmp(muststr, t, p->patmlen)) {
+		    matched = 1;
+		    break;
+		}
+	    }
+	}
     }
 
     /* in case we used the prog before... */
     p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
 
     if (fl & SUB_ALL) {
-	int i = matched && pattry(p, s);
-	*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
+	int i = matched && pattrylen(p, s, uml, 0, &patstralloc, 0);
+	if (!i)
+	    imd.replstr = NULL;
+	*sp = get_match_ret(&imd, 0, i ? l : 0);
 	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
 	    return 0;
 	return 1;
@@ -3161,23 +3226,25 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	     * Largest/smallest possible match at head of string.
 	     * First get the longest match...
 	     */
-	    if (pattry(p, s)) {
+	    if (pattrylen(p, s, uml, 0, &patstralloc, 0)) {
 		/* patmatchlen returns metafied length, as we need */
 	        int mlen = patmatchlen();
 		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
+		    send = s + mlen;
 		    /*
 		     * ... now we know whether it's worth looking for the
 		     * shortest, which we do by brute force.
 		     */
 		    for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
 			set_pat_end(p, *t);
-			if (pattrylen(p, s, t - s, umlen, NULL, 0)) {
+			if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
 			    mlen = patmatchlen();
 			    break;
 			}
 		    }
 		}
-		*sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
+		*sp = get_match_ret(&imd, 0, mlen);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3186,17 +3253,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    /* Smallest possible match at tail of string:  *
 	     * move back down string until we get a match. *
 	     * There's no optimization here.               */
-	    for (ioff = uml, t = s + l, umlen = 0; t >= s;
+	    for (ioff = uml, t = send, umlen = 0; t >= s;
 		 t--, ioff--, umlen++) {
-		if (t > s && t[-1] == Meta)
-		    t--;
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
-		    *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL);
+		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
+		    *sp = get_match_ret(&imd, t - s, uml);
 		    return 1;
 		}
-		if (t > s+1 && t[-2] == Meta)
-		    t--;
 	    }
 	    break;
 
@@ -3204,61 +3267,59 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    /* Largest possible match at tail of string:       *
 	     * move forward along string until we get a match. *
 	     * Again there's no optimisation.                  */
-	    for (ioff = 0, t = s, umlen = uml; t < s + l;
-		 ioff++, METAINC(t), umlen--) {
+	    for (ioff = 0, t = s, umlen = uml; t < send;
+		 ioff++, t++, umlen--) {
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
-		    *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
+		if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
+		    *sp = get_match_ret(&imd, t-s, uml);
 		    return 1;
 		}
-		if (*t == Meta)
-		    t++;
 	    }
 	    break;
 
 	case SUB_SUBSTR:
 	    /* Smallest at start, but matching substrings. */
 	    set_pat_start(p, l);
-	    if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
-		*sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
+	    if (!(fl & SUB_GLOBAL) &&
+		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+		*sp = get_match_ret(&imd, 0, 0);
 		return 1;
 	    } /* fall through */
 	case (SUB_SUBSTR|SUB_LONG):
 	    /* longest or smallest at start with substrings */
 	    t = s;
 	    if (fl & SUB_GLOBAL) {
-		repllist = newlinklist();
+		imd.repllist = newlinklist();
 		if (repllistp)
-		    *repllistp = repllist;
+		    *repllistp = imd.repllist;
 	    }
 	    ioff = 0;		/* offset into string */
 	    umlen = uml;
 	    do {
 		/* loop over all matches for global substitution */
 		matched = 0;
-		for (; t < s + l; METAINC(t), ioff++, umlen--) {
+		for (; t < send; t++, ioff++, umlen--) {
 		    /* Find the longest match from this position. */
 		    set_pat_start(p, t-s);
-		    if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) {
+		    if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
 			char *mpos = t + patmatchlen();
 			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
 			    char *ptr;
 			    int umlen2;
 			    for (ptr = t, umlen2 = 0; ptr < mpos;
-				 METAINC(ptr), umlen2++) {
+				 ptr++, umlen2++) {
 				set_pat_end(p, *ptr);
 				if (pattrylen(p, t, ptr - t, umlen2,
-					      NULL, ioff)) {
+					      &patstralloc, ioff)) {
 				    mpos = t + patmatchlen();
 				    break;
 				}
 			    }
 			}
 			if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
-			    *sp = get_match_ret(*sp, t-s, mpos-s, fl,
-						replstr, repllist);
+			    *sp = get_match_ret(&imd, t-s, mpos-s);
 			    if (mpos == t)
-				METAINC(mpos);
+				mpos++;
 			}
 			if (!(fl & SUB_GLOBAL)) {
 			    if (n) {
@@ -3278,13 +3339,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 			 * which is already marked for replacement.
 			 */
 			matched = 1;
-			for ( ; t < mpos; t++, ioff++, umlen--)
-			    if (*t == Meta)
-				t++;
+			while (t < mpos) {
+			    ioff++;
+			    umlen--;
+			    t++;
+			}
 			break;
 		    }
-		    if (*t == Meta)
-			t++;
 		}
 	    } while (matched);
 	    /*
@@ -3294,8 +3355,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	     */
 	    set_pat_start(p, l);
 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
-		pattry(p, s + l) && !--n) {
-		*sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
+		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
+		*sp = get_match_ret(&imd, 0, 0);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3305,47 +3367,50 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    /* Longest/shortest at end, matching substrings.       */
 	    if (!(fl & SUB_LONG)) {
 		set_pat_start(p, l);
-		if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) {
-		    *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+		if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
+		    *sp = get_match_ret(&imd, uml, uml);
+		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
-	    for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s;
+	    for (ioff = uml - 1, t = send - 1, umlen = 1; t >= s;
 		 t--, ioff--, umlen++) {
-		if (t > s && t[-1] == Meta)
-		    t--;
 		set_pat_start(p, t-s);
-		if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) {
+		if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff) &&
+		    !--n) {
 		    /* Found the longest match */
 		    char *mpos = t + patmatchlen();
 		    if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
 			char *ptr;
 			int umlen2;
 			for (ptr = t, umlen2 = 0; ptr < mpos;
-			     METAINC(ptr), umlen2++) {
+			     ptr++, umlen2++) {
 			    set_pat_end(p, *ptr);
-			    if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) {
+			    if (pattrylen(p, t, umlen2, 0, &patstralloc,
+					  ioff)) {
 				mpos = t + patmatchlen();
 				break;
 			    }
 			}
 		    }
-		    *sp = get_match_ret(*sp, t-s, mpos-s, fl,
-					replstr, NULL);
+		    *sp = get_match_ret(&imd, t-s, mpos-s);
+		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
 	    set_pat_start(p, l);
-	    if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) &&
+	    if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
+					     &patstralloc, uml) &&
 		!--n) {
-		*sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
+		*sp = get_match_ret(&imd, uml, uml);
+		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
 	}
     }
 
-    if (repllist && nonempty(repllist)) {
+    if (imd.repllist && nonempty(imd.repllist)) {
 	/* Put all the bits of a global search and replace together. */
 	LinkNode nd;
 	Repldata rd;
@@ -3353,8 +3418,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	char *ptr, *start;
 	int i;
 
+	/*
+	 * Use metafied string again.
+	 * Results from get_match_ret in repllist are all metafied.
+	 */
+	s = *sp;
 	i = 0;			/* start of last chunk we got from *sp */
-	for (nd = firstnode(repllist); nd; incnode(nd)) {
+	for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
 	    rd = (Repldata) getdata(nd);
 	    lleft += rd->b - i; /* previous chunk of *sp */
 	    lleft += strlen(rd->replstr);	/* the replaced bit */
@@ -3363,7 +3433,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	lleft += l - i;	/* final chunk from *sp */
 	start = t = zhalloc(lleft+1);
 	i = 0;
-	for (nd = firstnode(repllist); nd; incnode(nd)) {
+	for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
 	    rd = (Repldata) getdata(nd);
 	    memcpy(t, s + i, rd->b - i);
 	    t += rd->b - i;
@@ -3375,11 +3445,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	memcpy(t, s + i, l - i);
 	start[lleft] = '\0';
 	*sp = (char *)start;
+	patfreestr(&patstralloc);
 	return 1;
     }
 
     /* munge the whole string: no match, so no replstr */
-    *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
+    imd.replstr = NULL;
+    imd.repllist = NULL;
+    *sp = get_match_ret(&imd, 0, 0);
+    patfreestr(&patstralloc);
     return 1;
 }
 
diff --git a/Src/pattern.c b/Src/pattern.c
index 03ba37d..8de372c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2204,7 +2204,10 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen,
  * the pattern module) at which we are trying to match.
  * This is added in to the positions recorded in patbeginp and patendp
  * when we are looking for substrings.  Currently this only happens
- * in the parameter substitution code.
+ * in the parameter substitution code.  It refers to a real character
+ * offset, i.e. is already in the form ready for presentation to the
+ * general public --- this is necessary as we don't have the
+ * information to convert it down here.
  *
  * Note this is a character offset, i.e. a single possibly metafied and
  * possibly multibyte character counts as 1.
@@ -2292,7 +2295,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 	     */
 	    if (!patstralloc->progstrunmeta)
 	    {
-		patstralloc->progstrunmeta = dupstring(progstr);
+		patstralloc->progstrunmeta =
+		    dupstrpfx(progstr, (int)prog->patmlen);
 		unmetafy(patstralloc->progstrunmeta,
 			 &patstralloc->progstrunmetalen);
 	    }
@@ -2346,7 +2350,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 		 * In the orignal structure, but it might be unmetafied
 		 * for use with an unmetafied test string.
 		 */
-		patinlen = (int)prog->patmlen;
+		patinlen = pstrlen;
 		/* if matching files, must update globbing flags */
 		patglobflags = prog->globend;
 
@@ -2360,7 +2364,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 			 * Unmetafied: pstrlen contains unmetafied
 			 * length in bytes.
 			 */
-			str = metafy(patinstart, pstrlen, META_ALLOC);
+			str = metafy(patinstart, pstrlen, META_DUP);
 			mlen = CHARSUB(patinstart, patinstart + pstrlen);
 		    } else {
 			str = ztrduppfx(patinstart, patinlen);
@@ -2454,8 +2458,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 	    /*
 	     * Optimization: if we didn't find any Meta characters
 	     * to begin with, we don't need to look for them now.
-	     * Only do this if we did the unmetfication internally,
-	     * since otherwise it's too hard to work out.
+	     *
+	     * For patstralloc pased in, we want the unmetafied length.
 	     */
 	    if (patstralloc == &patstralloc_struct &&
 		patstralloc->unmetalen != origlen) {
@@ -2588,7 +2592,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 
 /*
  * Return length of previous succesful match.  This is
- * in metafied bytes, i.e. includes a count of Meta characters.
+ * in metafied bytes, i.e. includes a count of Meta characters,
+ * unless the match was done on an unmetafied string using
+ * a patstralloc stuct, in which case it, too is unmetafed.
  * Unusual and futile attempt at modular encapsulation.
  */
 
diff --git a/Src/zsh.h b/Src/zsh.h
index 32f2e0c..15fa5e4 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -480,6 +480,7 @@ typedef struct heap      *Heap;
 typedef struct heapstack *Heapstack;
 typedef struct histent   *Histent;
 typedef struct hookdef   *Hookdef;
+typedef struct imatchdata *Imatchdata;
 typedef struct jobfile   *Jobfile;
 typedef struct job       *Job;
 typedef struct linkedmod *Linkedmod;
@@ -1593,6 +1594,31 @@ typedef struct zpc_disables_save *Zpc_disables_save;
 /* Range: token followed by the (possibly multibyte) start and end */
 #define PP_RANGE  21
 
+/*
+ * Argument to get_match_ret() in glob.c
+ */
+struct imatchdata {
+    /* Metafied trial string */
+    char *mstr;
+    /* Its length */
+    int mlen;
+    /* Unmetafied string */
+    char *ustr;
+    /* Its length */
+    int ulen;
+    /* Flags (SUB_*) */
+    int flags;
+    /* Replacement string (metafied) */
+    char *replstr;
+    /*
+     * List of bits of matches to concatenate with replacement string.
+     * The data is a struct repldata.  It is not used in cases like
+     * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
+     * is anchored.  It goes on the heap.
+     */
+    LinkList repllist;
+};
+
 /* Globbing flags: lower 8 bits gives approx count */
 #define GF_LCMATCHUC	0x0100
 #define GF_IGNCASE	0x0200



Messages sorted by: Reverse Date, Date, Thread, Author