Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: Memory leak when working with undefined associative array keys & problems with unset



On Sun, 17 Sep 2017 16:15:14 -0700
Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
> On Sep 16,  8:57pm, Anssi Palin wrote:
> }
> } Zsh appears to permanently allocate some memory when just checking
> } if a key is defined in an associative array.
> 
> Hmm.
> 
> This has to do with order of operations.  In order to test whether a[0]
> is (not) set, the calling code needs a "struct pm" object for which it
> can attempt to retrieve a value and/or check the PM_UNSET flag.  For a
> hash-typed parameter, there's nowhere to temporarily store such an
> object except in the hashtable itself.  The same level of evaluation
> that tests ${+...} also handles ${...::=...} which also needs that
> struct to store into, so we can't NOT allocate it, and because all
> parameter ops are by value rather than by reference we can't wait until
> after we've assigned to it to add it to the hash (the knowledge that
> "a[0]" is part of the hash "a" or even that the key is "0", is lost
> by the time we're ready to do the assignment -- we have only a handle
> to the object that resides at $a[0]).

Can we really not do something like the patch below?

There may well be other cases to check, as the logic is complicated,
but I think this is the obvious case.  If it breaks anything, then
something is missing from the parameter tests.

> } The second issue I have pertains to special characters in associative
> } array keys when unsetting them individually:
> }
> } $ key='hello * [ world'
> } $ typeset -A a=("$key" val)
> } $ unset "a[$key]"
> } unset: a[hello * [ world]: invalid parameter name
> 
> Hmm, when examining ${a[$key]} we enter parse_subscript() with $key
> tokenized, but there's no way to get the tokenized string to reach
> the same point in the unset builtin (it's either already expanded,
> or not tokenized).  Also ${a[$key]} passes sub=0 to parse_subscript()
> whereas "unset" always passes sub=1, but I'm uncertain how that may
> be relevant.

I gave up on this ages ago, frankly.  We certainly a need way of raw key
reference, but I don't think we've got one, and each time we tweak it we
seem to make it even more tortuous.  Possibly we need yet another flag
to say "FOR ****'S SAKE DON'T TRY TO BE CLEVER NO I MEAN IT ****" (but
without the Tarantino script edit).

First hunk is completely gratuitous but I noticed it when debugging.

pws


diff --git a/Src/hist.c b/Src/hist.c
index da5a8b2..177250f 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -1083,7 +1083,6 @@ hbegin(int dohist)
     } else
 	histactive = HA_ACTIVE | HA_NOINC;
 
-    hf = getsparam("HISTFILE");
     /*
      * For INCAPPENDHISTORYTIME, when interactive, save the history here
      * as it gives a better estimate of the times of commands.
@@ -1104,8 +1103,10 @@ hbegin(int dohist)
      */
     if (isset(INCAPPENDHISTORYTIME) && !isset(SHAREHISTORY) &&
 	!isset(INCAPPENDHISTORY) &&
-	!(histactive & HA_NOINC) && !strin && histsave_stack_pos == 0)
+	!(histactive & HA_NOINC) && !strin && histsave_stack_pos == 0) {
+	hf = getsparam("HISTFILE");
 	savehistfile(hf, 0, HFILE_USE_OPTIONS | HFILE_FAST);
+    }
 }
 
 /**/
diff --git a/Src/params.c b/Src/params.c
index d71dfde..8683777 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -1204,7 +1204,7 @@ isident(char *s)
 /**/
 static zlong
 getarg(char **str, int *inv, Value v, int a2, zlong *w,
-       int *prevcharlen, int *nextcharlen)
+       int *prevcharlen, int *nextcharlen, int flags)
 {
     int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash;
     int keymatch = 0, needtok = 0, arglen, len, inpar = 0;
@@ -1407,6 +1407,8 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
 	if (ishash) {
 	    HashTable ht = v->pm->gsu.h->getfn(v->pm);
 	    if (!ht) {
+		if (flags & SCANPM_CHECKING)
+		    return isset(KSHARRAYS) ? 1 : 0;
 		ht = newparamtable(17, v->pm->node.nam);
 		v->pm->gsu.h->setfn(v->pm, ht);
 	    }
@@ -1848,7 +1850,8 @@ getindex(char **pptr, Value v, int flags)
 	zlong we = 0, dummy;
 	int startprevlen, startnextlen;
 
-	start = getarg(&s, &inv, v, 0, &we, &startprevlen, &startnextlen);
+	start = getarg(&s, &inv, v, 0, &we, &startprevlen, &startnextlen,
+		       flags);
 
 	if (inv) {
 	    if (!v->isarr && start != 0) {
@@ -1922,7 +1925,7 @@ getindex(char **pptr, Value v, int flags)
 
 	    if ((com = (*s == ','))) {
 		s++;
-		end = getarg(&s, &inv, v, 1, &dummy, NULL, NULL);
+		end = getarg(&s, &inv, v, 1, &dummy, NULL, NULL, flags);
 	    } else {
 		end = we ? we : start;
 	    }
diff --git a/Src/subst.c b/Src/subst.c
index 5df2a8b..d1827c2 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -2389,7 +2389,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
      */
     if (!subexp || aspar) {
 	char *ov = val;
-
+	int scanflags = hkeys | hvals;
+	if (arrasg)
+	    scanflags |= SCANPM_ASSIGNING;
+	if (qt)
+	    scanflags |= SCANPM_DQUOTED;
+	if (chkset)
+	    scanflags |= SCANPM_CHECKING;
 	/*
 	 * Second argument: decide whether to use the subexpression or
 	 *   the string next on the line as the parameter name.
@@ -2418,9 +2424,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 	if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
 			     (wantt ? -1 :
 			      ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
-			     hkeys|hvals|
-			     (arrasg ? SCANPM_ASSIGNING : 0)|
-			     (qt ? SCANPM_DQUOTED : 0))) ||
+			     scanflags)) ||
 	    (v->pm && (v->pm->node.flags & PM_UNSET)) ||
 	    (v->flags & VALFLAG_EMPTY))
 	    vunset = 1;
diff --git a/Src/zsh.h b/Src/zsh.h
index 27642f2..76412b8 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -1906,6 +1906,7 @@ struct tieddata {
 				  * necessarily want to match multiple
 				  * elements
 				  */
+#define SCANPM_CHECKING   (1<<10) /* Check if set, no need to create */
 /* "$foo[@]"-style substitution
  * Only sign bit is significant
  */



Messages sorted by: Reverse Date, Date, Thread, Author