Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: ordering of hash table scans



This started off as an attempt to make ztrcmp() handle multibyte
characters when MULTIBYTE is turned on, which I've done.  ztrcmp() is
only used when sorting the names of hash nodes for use when scanning
through hash tables, usually in order to print them out:  this is used
for commands of various sort, parameters, and a few other miscellaneous
bist and pieces.

I have a vague memory that it's deliberate that we don't use strcoll()
here, which would make the sorting locale dependent and in particular
possibly case-insensitive.  However, it's possible this was never openly
discussed in this particular case and I'm thinking of something else.
(Remember that here we're sorting on the names of hash nodes, some of
which don't allow the full character range if handled portably, but
that's not true of all forms.)

I then noticed that it's actually thoroughly inconsistent which objects
are sorted before printing out and which are printed out any old how as
they emerge from the hash table.  In particular there was no facility at
all for sorting when applying a pattern match to the output (typically
with the "-m" option to builtins).

I've tidied this up so that in any place where the result is being
directly printed sorting is applied.  Maybe someone can think of cases
where this isn't appropriate, but given the current half-baked state
it's hard to see.

Index: Src/builtin.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/builtin.c,v
retrieving revision 1.174
diff -u -r1.174 builtin.c
--- Src/builtin.c	21 Jan 2007 22:47:39 -0000	1.174
+++ Src/builtin.c	6 Feb 2007 21:39:45 -0000
@@ -493,7 +493,7 @@
 	    tokenize(*argv);
 	    if ((pprog = patcompile(*argv, PAT_STATIC, 0))) {
 		queue_signals();
-		match += scanmatchtable(ht, pprog, 0, 0, scanfunc, 0);
+		match += scanmatchtable(ht, pprog, 0, 0, 0, scanfunc, 0);
 		unqueue_signals();
 	    }
 	    else {
@@ -2394,7 +2394,7 @@
 		continue;
 	    }
 	    if (OPT_PLUS(ops,'m') && !asg->value) {
-		scanmatchtable(paramtab, pprog, on|roff, 0,
+		scanmatchtable(paramtab, pprog, 1, on|roff, 0,
 			       paramtab->printnode, printflags);
 		continue;
 	    }
@@ -2720,7 +2720,7 @@
 		/* with no options, just print all functions matching the glob pattern */
 		queue_signals();
 		if (!(on|off)) {
-		    scanmatchtable(shfunctab, pprog, 0, DISABLED,
+		    scanmatchtable(shfunctab, pprog, 1, 0, DISABLED,
 				   shfunctab->printnode, pflags);
 		} else {
 		    /* apply the options to all functions matching the glob pattern */
@@ -2987,25 +2987,25 @@
 		 * We're not using it, so search for ... */
 
 		/* aliases ... */
-		scanmatchtable(aliastab, pprog, 0, DISABLED,
+		scanmatchtable(aliastab, pprog, 1, 0, DISABLED,
 			       aliastab->printnode, printflags);
 
 		/* and reserved words ... */
-		scanmatchtable(reswdtab, pprog, 0, DISABLED,
+		scanmatchtable(reswdtab, pprog, 1, 0, DISABLED,
 			       reswdtab->printnode, printflags);
 
 		/* and shell functions... */
-		scanmatchtable(shfunctab, pprog, 0, DISABLED,
+		scanmatchtable(shfunctab, pprog, 1, 0, DISABLED,
 			       shfunctab->printnode, printflags);
 
 		/* and builtins. */
-		scanmatchtable(builtintab, pprog, 0, DISABLED,
+		scanmatchtable(builtintab, pprog, 1, 0, DISABLED,
 			       builtintab->printnode, printflags);
 	    }
 	    /* Done search for `internal' commands, if the -p option *
 	     * was not used.  Now search the path.                   */
 	    cmdnamtab->filltable(cmdnamtab);
-	    scanmatchtable(cmdnamtab, pprog, 0, 0,
+	    scanmatchtable(cmdnamtab, pprog, 1, 0, 0,
 			   cmdnamtab->printnode, printflags);
 
 	    unqueue_signals();
@@ -3193,7 +3193,7 @@
 	    tokenize(*argv);  /* expand */
 	    if ((pprog = patcompile(*argv, PAT_STATIC, NULL))) {
 		/* display matching hash table elements */
-		scanmatchtable(ht, pprog, 0, 0, ht->printnode, printflags);
+		scanmatchtable(ht, pprog, 1, 0, 0, ht->printnode, printflags);
 	    } else {
 		untokenize(*argv);
 		zwarnnam(name, "bad pattern : %s", *argv);
@@ -3378,7 +3378,7 @@
 	    if ((pprog = patcompile(*argv, PAT_STATIC, NULL))) {
 		/* display the matching aliases */
 		queue_signals();
-		scanmatchtable(ht, pprog, flags1, flags2,
+		scanmatchtable(ht, pprog, 1, flags1, flags2,
 			       ht->printnode, printflags);
 		unqueue_signals();
 	    } else {
Index: Src/hashtable.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/hashtable.c,v
retrieving revision 1.24
diff -u -r1.24 hashtable.c
--- Src/hashtable.c	7 Mar 2006 21:30:36 -0000	1.24
+++ Src/hashtable.c	6 Feb 2007 21:39:46 -0000
@@ -355,23 +355,42 @@
  * the scanfunc.  Replaced elements will appear in the scan exactly once,
  * the new version if it was not scanned before the replacement was made.
  * Added elements might or might not appear in the scan.
+ *
+ * pprog, if non-NULL, is a pattern that must match the name
+ * of the node.
+ *
+ * The function returns the number of matches, as reduced by pprog, flags1
+ * and flags2.
  */
 
 /**/
-mod_export void
-scanhashtable(HashTable ht, int sorted, int flags1, int flags2, ScanFunc scanfunc, int scanflags)
+mod_export int
+scanmatchtable(HashTable ht, Patprog pprog, int sorted,
+	       int flags1, int flags2, ScanFunc scanfunc, int scanflags)
 {
+    int match = 0;
     struct scanstatus st;
 
-    if (ht->scantab) {
+    /*
+     * scantab is currently only used by modules to scan
+     * tables where the contents are generated on the fly from
+     * other objects.  Note the fact that in this case pprog,
+     * sorted, flags1 and flags2 are ignore.
+     */
+    if (!pprog && ht->scantab) {
 	ht->scantab(ht, scanfunc, scanflags);
-	return;
+	return ht->ct;
     }
     if (sorted) {
 	int i, ct = ht->ct;
 	VARARR(HashNode, hnsorttab, ct);
 	HashNode *htp, hn;
 
+	/*
+	 * Because the structure might change under our feet,
+	 * we can't apply the flags and the pattern before sorting,
+	 * tempting though that is.
+	 */
 	for (htp = hnsorttab, i = 0; i < ht->hsize; i++)
 	    for (hn = ht->nodes[i]; hn; hn = hn->next)
 		*htp++ = hn;
@@ -382,10 +401,14 @@
 	st.u.s.ct = ct;
 	ht->scan = &st;
 
-	for (htp = hnsorttab, i = 0; i < ct; i++, htp++)
-	    if (*htp && ((*htp)->flags & flags1) + !flags1 &&
-		!((*htp)->flags & flags2))
+	for (htp = hnsorttab, i = 0; i < ct; i++, htp++) {
+	    if ((!flags1 || ((*htp)->flags & flags1)) &&
+		!((*htp)->flags & flags2) &&
+		(!pprog || pattry(pprog, (*htp)->nam))) {
+		match++;
 		scanfunc(*htp, scanflags);
+	    }
+	}
 
 	ht->scan = NULL;
     } else {
@@ -399,49 +422,27 @@
 	    for (st.u.u = nodes[i]; st.u.u; ) {
 		HashNode hn = st.u.u;
 		st.u.u = st.u.u->next;
-		if ((hn->flags & flags1) + !flags1 && !(hn->flags & flags2))
+		if ((!flags1 || (hn->flags & flags1)) && !(hn->flags & flags2)
+		    && (!pprog || pattry(pprog, hn->nam))) {
+		    match++;
 		    scanfunc(hn, scanflags);
+		}
 	    }
 
 	ht->scan = NULL;
     }
+
+    return match;
 }
 
-/* Scan all nodes in a hash table and executes scanfunc on the *
- * nodes which meet all the following criteria:                *
- * The hash key must match the glob pattern given by `com'.    *
- * If (flags1 > 0), then any flag in flags1 must be set.       *
- * If (flags2 > 0), then all flags in flags2 must NOT be set.  *
- *                                                             *
- * scanflags is passed unchanged to scanfunc (if executed).    *
- * The return value is the number of matches.                  */
 
 /**/
-int
-scanmatchtable(HashTable ht, Patprog pprog, int flags1, int flags2, ScanFunc scanfunc, int scanflags)
+mod_export int
+scanhashtable(HashTable ht, int sorted, int flags1, int flags2,
+	      ScanFunc scanfunc, int scanflags)
 {
-    int i, hsize = ht->hsize;
-    HashNode *nodes = ht->nodes;
-    int match = 0;
-    struct scanstatus st;
-
-    st.sorted = 0;
-    ht->scan = &st;
-
-    for (i = 0; i < hsize; i++)
-	for (st.u.u = nodes[i]; st.u.u; ) {
-	    HashNode hn = st.u.u;
-	    st.u.u = st.u.u->next;
-	    if ((hn->flags & flags1) + !flags1 && !(hn->flags & flags2) &&
-		pattry(pprog, hn->nam)) {
-		scanfunc(hn, scanflags);
-		match++;
-	    }
-	}
-
-    ht->scan = NULL;
-
-    return match;
+    return scanmatchtable(ht, NULL, sorted, flags1, flags2,
+			  scanfunc, scanflags);
 }
 
 /* Expand hash tables when they get too many entries. *
Index: Src/module.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/module.c,v
retrieving revision 1.23
diff -u -r1.23 module.c
--- Src/module.c	10 Jul 2006 13:08:23 -0000	1.23
+++ Src/module.c	6 Feb 2007 21:39:47 -0000
@@ -1265,7 +1265,7 @@
 	return ret;
     } else if(!*args) {
 	/* list autoloaded builtins */
-	scanhashtable(builtintab, 0, 0, 0,
+	scanhashtable(builtintab, 1, 0, 0,
 	    autoloadscan, OPT_ISSET(ops,'L') ? PRINT_LIST : 0);
 	return 0;
     } else {
Index: Src/options.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/options.c,v
retrieving revision 1.33
diff -u -r1.33 options.c
--- Src/options.c	9 Jan 2007 21:59:31 -0000	1.33
+++ Src/options.c	6 Feb 2007 21:39:47 -0000
@@ -578,7 +578,8 @@
 		continue;
 	    }
 	    /* Loop over expansions. */
-	    scanmatchtable(optiontab, pprog, 0, OPT_ALIAS, setoption, !isun);
+	    scanmatchtable(optiontab, pprog, 0, 0, OPT_ALIAS,
+			   setoption, !isun);
 	    args++;
 	}
     }
Index: Src/params.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/params.c,v
retrieving revision 1.121
diff -u -r1.121 params.c
--- Src/params.c	2 Nov 2006 18:43:19 -0000	1.121
+++ Src/params.c	6 Feb 2007 21:39:48 -0000
@@ -4266,7 +4266,7 @@
 	{
             HashTable ht = p->gsu.h->getfn(p);
             if (ht)
-		scanhashtable(ht, 0, 0, PM_UNSET,
+		scanhashtable(ht, 1, 0, PM_UNSET,
 			      ht->printnode, PRINT_KV_PAIR);
 	}
 	if (!(printflags & PRINT_KV_PAIR))
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.152
diff -u -r1.152 utils.c
--- Src/utils.c	27 Jan 2007 23:52:13 -0000	1.152
+++ Src/utils.c	6 Feb 2007 21:39:49 -0000
@@ -3700,27 +3700,56 @@
 int
 ztrcmp(char const *s1, char const *s2)
 {
-    int c1, c2;
+    convchar_t c1 = 0, c2;
 
-    while (*s1 && *s1 == *s2) {
-	s1++;
-	s2++;
-    }
-
-    if (!(c1 = STOUC(*s1)))
-	c1 = -1;
-    else if (c1 == STOUC(Meta))
-	c1 = STOUC(*++s1) ^ 32;
-    if (!(c2 = STOUC(*s2)))
-	c2 = -1;
-    else if (c2 == STOUC(Meta))
-	c2 = STOUC(*++s2) ^ 32;
+#ifdef MULTIBYTE_SUPPORT
+    if (isset(MULTIBYTE)) {
+	mb_metacharinit();
+	while (*s1) {
+	    int clen = mb_metacharlenconv(s1, &c1);
+
+	    if (strncmp(s1, s2, clen))
+		break;
+	    s1 += clen;
+	    s2 += clen;
+	}
+    } else
+#endif
+	while (*s1 && *s1 == *s2) {
+	    s1++;
+	    s2++;
+	}
+
+    if (!*s1) {
+	if (!*s2)
+	    return 0;
+	return -1;
+    }
+    if (!*s2)
+	return 1;
+#ifdef MULTIBYTE_SUPPORT
+    if (isset(MULTIBYTE)) {
+	/* TODO: shift state for s2 might be wrong? */
+	mb_metacharinit();
+	(void)mb_metacharlenconv(s2, &c2);
+	if (c1 == WEOF)
+	    c1 = STOUC(*s1 == Meta ? s1[1] ^ 32 : *s1);
+	if (c2 == WEOF)
+	    c2 = STOUC(*s2 == Meta ? s2[1] ^ 32 : *s2);
+    }
+    else
+#endif
+    {
+	c1 = STOUC(*s1 == Meta ? s1[1] ^ 32 : *s1);
+	c2 = STOUC(*s2 == Meta ? s2[1] ^ 32 : *s2);
+    }
 
-    if (c1 == c2)
-	return 0;
     if (c1 < c2)
 	return -1;
-    return 1;
+    else if (c1 == c2)
+	return 0;
+    else
+	return 1;
 }
 
 /* Return the unmetafied length of a metafied string. */

-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/



Messages sorted by: Reverse Date, Date, Thread, Author