Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: PATH: autoload with explicit path



On Wed, 11 Jan 2017 20:51:22 +0000
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx> wrote:
> On Wed, 11 Jan 2017 11:42:32 +0000
> Peter Stephenson <p.stephenson@xxxxxxxxxxx> wrote:
> > The main disadvantage is each function record now contains the name of
> > the directory, so this takes extra memory --- it looks like a couple of
> > dozen k in the case of my function directory which is quite full.  This
> > could be optimised with indirection but I'm not sure it's worth the work.
> 
> This seemed like too good an idea not to follow up.  Now "autoload
> /blah/blah/*" is as efficient as the old way of doing it, too.

Update based on the previous fix, also fix a memory leak in the cache
and add a test to check what happens when bits of the cache are
deleted.  I think this is sane now.

pws

diff --git a/Src/builtin.c b/Src/builtin.c
index 716ddd4..a683032 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -2958,13 +2958,13 @@ check_autoload(Shfunc shf, char *name, Options ops, int func)
 	    }
 	}
 	if (getfpfunc(shf->node.nam, NULL, &dir_path, NULL, 1)) {
-	    zsfree(shf->filename);
+	    dircache_set(&shf->filename, NULL);
 	    if (*dir_path != '/') {
 		dir_path = zhtricat(metafy(zgetcwd(), -1, META_HEAPDUP),
 				    "/", dir_path);
 		dir_path = xsymlink(dir_path, 1);
 	    }
-	    shf->filename = ztrdup(dir_path);
+	    dircache_set(&shf->filename, dir_path);
 	    shf->node.flags |= PM_LOADDIR;
 	    return 0;
 	}
@@ -3029,8 +3029,8 @@ add_autoload_function(Shfunc shf, char *funcname)
 	    *nam++ = '\0';
 	    dir = funcname;
 	}
-	zsfree(shf->filename);
-	shf->filename = ztrdup(dir);
+	dircache_set(&shf->filename, NULL);
+	dircache_set(&shf->filename, dir);
 	shf->node.flags |= PM_LOADDIR;
 	shfunctab->addnode(shfunctab, ztrdup(nam), shf);
     } else {
@@ -3280,8 +3280,8 @@ bin_functions(char *name, char **argv, Options ops, int func)
 		shfunctab->addnode(shfunctab, ztrdup(funcname), shf);
 	    }
 	    if (*argv) {
-		zsfree(shf->filename);
-		shf->filename = ztrdup(*argv);
+		dircache_set(&shf->filename, NULL);
+		dircache_set(&shf->filename, *argv);
 		on |= PM_LOADDIR;
 	    }
 	    shf->node.flags = on;
diff --git a/Src/exec.c b/Src/exec.c
index 68c455b..7a5d2bd 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -4902,6 +4902,7 @@ execfuncdef(Estate state, Eprog redir_prog)
 	shf = (Shfunc) zalloc(sizeof(*shf));
 	shf->funcdef = prog;
 	shf->node.flags = 0;
+	/* No dircache here, not a directory */
 	shf->filename = ztrdup(scriptfilename);
 	shf->lineno = lineno;
 	/*
@@ -4934,7 +4935,7 @@ execfuncdef(Estate state, Eprog redir_prog)
 		    freeeprog(shf->funcdef);
 		    if (shf->redir) /* shouldn't be */
 			freeeprog(shf->redir);
-		    zsfree(shf->filename);
+		    dircache_set(&shf->filename, NULL);
 		    zfree(shf, sizeof(*shf));
 		    state->pc = end;
 		    return 1;
@@ -4965,7 +4966,7 @@ execfuncdef(Estate state, Eprog redir_prog)
 	    freeeprog(shf->funcdef);
 	    if (shf->redir) /* shouldn't be */
 		freeeprog(shf->redir);
-	    zsfree(shf->filename);
+	    dircache_set(&shf->filename, NULL);
 	    zfree(shf, sizeof(*shf));
 	    break;
 	} else {
@@ -4974,7 +4975,7 @@ execfuncdef(Estate state, Eprog redir_prog)
 		(signum = getsignum(s + 4)) != -1) {
 		if (settrap(signum, NULL, ZSIG_FUNC)) {
 		    freeeprog(shf->funcdef);
-		    zsfree(shf->filename);
+		    dircache_set(&shf->filename, NULL);
 		    zfree(shf, sizeof(*shf));
 		    state->pc = end;
 		    return 1;
@@ -5205,7 +5206,8 @@ loadautofn(Shfunc shf, int fksh, int autol, int current_fpath)
 	    else
 		shf->funcdef = dupeprog(prog, 0);
 	    shf->node.flags &= ~(PM_UNDEFINED|PM_LOADDIR);
-	    zsfree(shf->filename);
+	    dircache_set(&shf->filename, NULL);
+	    /* Full filename, don't use dircache */
 	    shf->filename = fname;
 	} else {
 	    VARARR(char, n, strlen(shf->node.nam) + 1);
@@ -5229,7 +5231,8 @@ loadautofn(Shfunc shf, int fksh, int autol, int current_fpath)
 	else
 	    shf->funcdef = dupeprog(stripkshdef(prog, shf->node.nam), 0);
 	shf->node.flags &= ~(PM_UNDEFINED|PM_LOADDIR);
-	zsfree(shf->filename);
+	dircache_set(&shf->filename, NULL);
+	/* Full filename, don't use dircache */
 	shf->filename = fname;
     }
     popheap();
@@ -5620,6 +5623,8 @@ runshfunc(Eprog prog, FuncWrap wrap, char *name)
  * Search fpath for an undefined function.  Finds the file, and returns the
  * list of its contents.
  *
+ * If test is 0, load the function; *fname is set to zalloc'ed location.
+ *
  * If test_only is 1, don't load function, just test for it:
  * - Non-null return means function was found
  * - *fname points to path at which found (not duplicated)
diff --git a/Src/hashtable.c b/Src/hashtable.c
index 2a8b585..22ebe34 100644
--- a/Src/hashtable.c
+++ b/Src/hashtable.c
@@ -889,7 +889,7 @@ freeshfuncnode(HashNode hn)
 	freeeprog(shf->funcdef);
     if (shf->redir)
 	freeeprog(shf->redir);
-    zsfree(shf->filename);
+    dircache_set(&shf->filename, NULL);
     if (shf->sticky) {
 	if (shf->sticky->n_on_opts)
 	    zfree(shf->sticky->on_opts,
@@ -1442,3 +1442,140 @@ freehistdata(Histent he, int unlink)
 	}
     }
 }
+
+
+/***********************************************************************
+ * Directory name cache mechanism
+ *
+ * The idea of this is that there are various shell structures,
+ * notably functions, that record the directories with which they
+ * are associated.  Rather than store the full string each time,
+ * we store a pointer to the same location and count the references.
+ * This is optimised so that retrieval is quick at the expense of
+ * searching the list when setting up the structure, which is a much
+ * rarer operation.
+ *
+ * There is nothing special about the fact that the strings are
+ * directories, except for the assumptions for efficiency that many
+ * structures will point to the same one, and that there are not too
+ * many different directories associated with the shell.
+ **********************************************************************/
+
+struct dircache_entry
+{
+    /* Name of directory in cache */
+    char *name;
+    /* Number of references to it */
+    int refs;
+};
+
+/*
+ * dircache is the cache, of length dircache_size.
+ * dircache_lastentry is the last entry used, an optimisation
+ * for multiple references to the same directory, e.g
+ * "autoload /blah/blah/\*".
+ */
+struct dircache_entry *dircache, *dircache_lastentry;
+int dircache_size;
+
+/*
+ * Set *name to point to a cached version of value.
+ * value is copied so may come from any source.
+ *
+ * If value is NULL, look for the existing value of *name (safe if this
+ * too is NULL) and remove a reference to it from the cache. If it's
+ * not found in the cache, it's assumed to be an allocated string and
+ * freed --- this currently occurs for a shell function that's been
+ * loaded as the filename is now a full path, not just a directory,
+ * though we may one day optimise this to a cached directory plus a
+ * name, too.  Note --- the function does *not* otherwise check
+ * if *name points to something already cached, so this is
+ * necessary any time *name may already be in the cache.
+ */
+
+/**/
+mod_export void
+dircache_set(char **name, char *value)
+{
+    struct dircache_entry *dcptr, *dcnew;
+
+    if (!value) {
+	if (!*name)
+	    return;
+	if (!dircache_size) {
+	    zsfree(*name);
+	    *name = NULL;
+	    return;
+	}
+
+	for (dcptr = dircache; dcptr < dircache + dircache_size; dcptr++)
+	{
+	    /* Must be a pointer much, not a string match */
+	    if (*name == dcptr->name)
+	    {
+		--dcptr->refs;
+		if (!dcptr->refs) {
+		    ptrdiff_t ind = dcptr - dircache;
+		    zsfree(dcptr->name);
+		    --dircache_size;
+
+		    if (!dircache_size) {
+			zfree(dircache, sizeof(*dircache));
+			dircache = NULL;
+			dircache_lastentry = NULL;
+			return;
+		    }
+		    dcnew = (struct dircache_entry *)
+			zalloc(dircache_size * sizeof(*dcnew));
+		    if (ind)
+			memcpy(dcnew, dircache, ind * sizeof(*dcnew));
+		    if (ind < dircache_size)
+			memcpy(dcnew + ind, dcptr + 1,
+			       (dircache_size - ind) * sizeof(*dcnew));
+		    zfree(dircache, (dircache_size+1)*sizeof(*dcnew));
+		    dircache = dcnew;
+		    dircache_lastentry = NULL;
+		}
+		*name = NULL;
+		return;
+	    }
+	}
+	zsfree(*name);
+	*name = NULL;
+    } else {
+	/*
+	 * We'll maintain the cache at exactly the right size rather
+	 * than overallocating.  The rationale here is that typically
+	 * we'll get a lot of functions in a small number of directories
+	 * so the complexity overhead of maintaining a separate count
+	 * isn't really matched by the efficiency gain.
+ 	 */
+	if (dircache_lastentry &&
+	    !strcmp(value, dircache_lastentry->name)) {
+	    *name = dircache_lastentry->name;
+	    ++dircache_lastentry->refs;
+	    return;
+	} else if (!dircache_size) {
+	    dircache_size = 1;
+	    dcptr = dircache =
+		(struct dircache_entry *)zalloc(sizeof(*dircache));
+	} else {
+	    for (dcptr = dircache; dcptr < dircache + dircache_size; dcptr++)
+	    {
+		if (!strcmp(value, dcptr->name)) {
+		    *name = dcptr->name;
+		    ++dcptr->refs;
+		    return;
+		}
+	    }
+	    ++dircache_size;
+	    dircache = (struct dircache_entry *)
+		zrealloc(dircache, sizeof(*dircache) * dircache_size);
+	    dcptr = dircache + dircache_size - 1;
+	}
+	dcptr->name = ztrdup(value);
+	*name = dcptr->name;
+	dcptr->refs = 1;
+	dircache_lastentry = dcptr;
+    }
+}
diff --git a/Src/signals.c b/Src/signals.c
index 9e05add..2de5743 100644
--- a/Src/signals.c
+++ b/Src/signals.c
@@ -811,6 +811,7 @@ dosavetrap(int sig, int level)
 	    newshf->node.nam = ztrdup(shf->node.nam);
 	    newshf->node.flags = shf->node.flags;
 	    newshf->funcdef = dupeprog(shf->funcdef, 0);
+	    /* Assume this is a real filename, don't use dircache */
 	    newshf->filename = ztrdup(shf->filename);
 	    if (shf->sticky) {
 		newshf->sticky = sticky_emulation_dup(shf->sticky, 0);
diff --git a/Test/C04funcdef.ztst b/Test/C04funcdef.ztst
index 7100280..370394b 100644
--- a/Test/C04funcdef.ztst
+++ b/Test/C04funcdef.ztst
@@ -429,6 +429,45 @@
 0: autoload -X interaction with absolute filename used for source location
 >Function was loaded correctly.
 
+  (
+    fpath=()
+    mkdir extra2
+    for f in fun2a fun2b; do
+      print "print $f" >extra2/$f
+    done
+    repeat 3; do
+      autoload $PWD/extra2/fun2{a,b} $PWD/extra/spec
+      fun2a
+      fun2b
+      spec
+      unfunction fun2a fun2b spec
+      autoload $PWD/extra2/fun2{a,b} $PWD/extra/spec
+      spec
+      fun2b
+      fun2a
+      unfunction fun2a fun2b spec
+    done
+  )
+0: Exercise the directory name cache for autoloads
+>fun2a
+>fun2b
+>I have been loaded by explicit path.
+>I have been loaded by explicit path.
+>fun2b
+>fun2a
+>fun2a
+>fun2b
+>I have been loaded by explicit path.
+>I have been loaded by explicit path.
+>fun2b
+>fun2a
+>fun2a
+>fun2b
+>I have been loaded by explicit path.
+>I have been loaded by explicit path.
+>fun2b
+>fun2a
+
 %clean
 
  rm -f file.in file.out



Messages sorted by: Reverse Date, Date, Thread, Author