Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Substitution ${...///} slows down when certain UTF character occurs



On Tue, 29 Sep 2015 12:23:56 -0700
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> On Sep 29,  7:37pm, Peter Stephenson wrote:
> }
> } This uses the new interface.  I haven't done any testing apart from the
> } normal test suite.
> 
> I ran my looping version of Sebastian's test program and got comparable
> (fast!) times for all of his samples.

OK, that suggests we're now allocating memory infrequently enough (as
intended) that it's safe to put it on the heap, which simplifies things
further.  One other unnecessary chunk removed.

pws

diff --git a/Src/glob.c b/Src/glob.c
index d998663..24e60d0 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -2478,9 +2478,6 @@ get_match_ret(Imatchdata imd, int b, int e)
 	if (imeta(*p))
 	    add++;
     e += add;
-    for (; p < imd->ustr + imd->ulen; p++)
-	if (imeta(*p))
-	    add++;
 
     /* Everything now refers to metafied lengths. */
     if (replstr || (fl & SUB_LIST)) {
@@ -2808,7 +2805,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    imd.replstr = NULL;
 	}
 	*sp = get_match_ret(&imd, 0, umltot);
-	patfreestr(&patstralloc);
 	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
 	    return 0;
 	return 1;
@@ -2856,7 +2852,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		    }
 		}
 		*sp = get_match_ret(&imd, 0, mlen);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2884,13 +2879,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    }
 	    if (tmatch) {
 		*sp = get_match_ret(&imd, tmatch - s, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
 					       &patstralloc, ioff)) {
 		*sp = get_match_ret(&imd, umltot, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2904,7 +2897,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		set_pat_start(p, t-s);
 		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
 		    *sp = get_match_ret(&imd, t-s, umltot);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 		if (fl & SUB_START)
@@ -2914,7 +2906,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
 					       &patstralloc, ioff)) {
 		*sp = get_match_ret(&imd, umltot, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -2926,7 +2917,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		pattrylen(p, send, 0, 0, &patstralloc, 0) &&
 		!--n) {
 		*sp = get_match_ret(&imd, 0, 0);
-		patfreestr(&patstralloc);
 		return 1;
 	    } /* fall through */
 	case (SUB_SUBSTR|SUB_LONG):
@@ -2984,7 +2974,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 				umlen -= iincchar(&t, send - t);
 				continue;
 			    } else {
-				patfreestr(&patstralloc);
 				return 1;
 			    }
 			}
@@ -3011,7 +3000,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
 		*sp = get_match_ret(&imd, 0, 0);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3024,7 +3012,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
 		    !--n) {
 		    *sp = get_match_ret(&imd, umltot, umltot);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3081,7 +3068,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		    }
 		}
 		*sp = get_match_ret(&imd, tmatch-s, mpos-s);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    set_pat_start(p, l);
@@ -3089,7 +3075,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 					     &patstralloc, umltot) &&
 		!--n) {
 		*sp = get_match_ret(&imd, umltot, umltot);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3134,11 +3119,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    start[lleft] = '\0';
 	    *sp = (char *)start;
 	}
-	patfreestr(&patstralloc);
 	return 1;
     }
     if (fl & SUB_LIST) {	/* safety: don't think this can happen */
-	patfreestr(&patstralloc);
 	return 0;
     }
 
@@ -3146,7 +3129,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
     imd.replstr = NULL;
     imd.repllist = NULL;
     *sp = get_match_ret(&imd, 0, 0);
-    patfreestr(&patstralloc);
     return (fl & SUB_RETFAIL) ? 0 : 1;
 }
 
@@ -3244,7 +3226,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		    }
 		}
 		*sp = get_match_ret(&imd, 0, mlen);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3357,7 +3338,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
 		*sp = get_match_ret(&imd, 0, 0);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3369,7 +3349,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 		set_pat_start(p, l);
 		if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
 		    *sp = get_match_ret(&imd, uml, uml);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3394,7 +3373,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 			}
 		    }
 		    *sp = get_match_ret(&imd, t-s, mpos-s);
-		    patfreestr(&patstralloc);
 		    return 1;
 		}
 	    }
@@ -3403,7 +3381,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 					     &patstralloc, uml) &&
 		!--n) {
 		*sp = get_match_ret(&imd, uml, uml);
-		patfreestr(&patstralloc);
 		return 1;
 	    }
 	    break;
@@ -3445,7 +3422,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
 	memcpy(t, s + i, l - i);
 	start[lleft] = '\0';
 	*sp = (char *)start;
-	patfreestr(&patstralloc);
 	return 1;
     }
 
@@ -3453,7 +3429,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
     imd.replstr = NULL;
     imd.repllist = NULL;
     *sp = get_match_ret(&imd, 0, 0);
-    patfreestr(&patstralloc);
     return 1;
 }
 
diff --git a/Src/pattern.c b/Src/pattern.c
index 8de372c..68a3409 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -2028,8 +2028,8 @@ pattrystart(void)
  *
  * Unmetafy a trial string for use in pattern matching, if needed.
  *
- * If it is needed, returns a zalloc()'d string; if not needed, returns
- * NULL.
+ * If it is needed, returns a heap allocated string; if not needed,
+ * returns NULL.
  *
  * prog is the pattern to be executed.
  * string is the metafied trial string.
@@ -2046,7 +2046,7 @@ pattrystart(void)
  *  unmetalenp is the umetafied length of a path segment preceeding
  *    the trial string needed for file mananagement; it is calculated as
  *    needed so does not need to be initialised.
- *  alloced is the memory allocated --- same as return value from
+ *  alloced is the memory allocated on the heap --- same as return value from
  *    function.
  */
 /**/
@@ -2097,7 +2097,7 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
 	int i, icopy, ncopy;
 
 	dst = patstralloc->alloced =
-	    zalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
+	    zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
 
 	if (needfullpath) {
 	    /* loop twice, copy path buffer first time */
@@ -2134,20 +2134,6 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
 
 
 /*
- * Free memory allocated by patallocstr().
- */
-
-/**/
-mod_export
-void patfreestr(Patstralloc patstralloc)
-{
-    if (patstralloc->alloced)
-	zfree(patstralloc->alloced,
-	      patstralloc->unmetalen + patstralloc->unmetalenp);
-}
-
-
-/*
  * Test prog against null-terminated, metafied string.
  */
 
@@ -2189,8 +2175,9 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen,
  * done if there is no path prefix (pathpos == 0) as otherwise the path
  * buffer and unmetafied string may not match.  To do this,
  * patallocstr() is callled (use force = 1 to ensure it is alway
- * unmetafied); paststralloc points to existing storage.  When all
- * pattern matching is done, patfreestr() is called.
+ * unmetafied); paststralloc points to existing storage. Memory is
+ * on the heap.
+ *
  * patstralloc->alloced and patstralloc->unmetalen contain the
  * unmetafied string and its length.  In that case, the rules for the
  * earlier arguments change:
@@ -2387,8 +2374,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 	    }
 	}
 
-	if (patstralloc == &patstralloc_struct)
-	    patfreestr(patstralloc);
 	return ret;
     } else {
 	int q = queue_signal_level();
@@ -2425,8 +2410,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 	    }
 	}
 	if (!ret) {
-	    if (patstralloc == &patstralloc_struct)
-		patfreestr(patstralloc);
 	    return 0;
 	}
 
@@ -2583,9 +2566,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
 
 	restore_queue_signals(q);
 
-	if (patstralloc == &patstralloc_struct)
-	    patfreestr(patstralloc);
-
 	return ret;
     }
 }



Messages sorted by: Reverse Date, Date, Thread, Author