Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH 2/2: [WIP] Efficient dedup for unsorted completions



This implements my idea for sorting a temporary array and then using
that for deduplication. This is fast enough that -2 isn't needed in
_path_files, and will also help for potential other cases other than
file completion.

PS
I realized a bit late that Cmatch is typedeffed to a pointer so the double
pointer shenanigans are a bit pointless, but I'll leave reworking that
until another evening...

---
 Src/Zle/comp.h     |  1 +
 Src/Zle/compcore.c | 66 +++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/Src/Zle/comp.h b/Src/Zle/comp.h
index a8480c2bac..2ca779fe53 100644
--- a/Src/Zle/comp.h
+++ b/Src/Zle/comp.h
@@ -140,6 +140,7 @@ struct cmatch {
 #define CMF_ALL      (1<<13)	/* a match representing all other matches */
 #define CMF_DUMMY    (1<<14)	/* unselectable dummy match */
 #define CMF_MORDER   (1<<15)    /* order by matches, not display strings */
+#define CMF_DELETE   (1<<16)    /* used for deduplication of unsorted matches, don't set */
 
 /* Stuff for completion matcher control. */
 
diff --git a/Src/Zle/compcore.c b/Src/Zle/compcore.c
index a9ace5587b..126cdd3ae9 100644
--- a/Src/Zle/compcore.c
+++ b/Src/Zle/compcore.c
@@ -3191,6 +3191,13 @@ matchcmp(Cmatch *a, Cmatch *b)
 	    matchorder & CGF_NUMSORT) ? SORTIT_NUMERICALLY : 0));
 }
 
+/**/
+static int
+matchcmp_pointer(Cmatch **a, Cmatch **b)
+{
+    return matchcmp(*a, *b);
+}
+
 /* This tests whether two matches are equal (would produce the same
  * strings on the command line). */
 
@@ -3284,30 +3291,47 @@ makearray(LinkList l, int type, int flags, int *np, int *nlp, int *llp)
 	    }
 	/* used -O nosort or -V, don't sort */
 	} else {
-	    /* didn't use -1 or -2, so remove all duplicates (inefficient) */
+	    /* didn't use -1 or -2, so remove all duplicates (efficient) */
 	    if (!(flags & CGF_UNIQALL) && !(flags & CGF_UNIQCON)) {
-                int dup;
-
-		for (ap = rp; *ap; ap++) {
-		    for (bp = cp = ap + 1; *bp; bp++) {
-			if (!matcheq(*ap, *bp))
-			    *cp++ = *bp;
-			else
-			    n--;
+                int dup, i;
+
+		/* To avoid O(n^2) here, sort a temporary list of pointers to the real array */
+		/* TODO: this can probably just be a copy of the array, i forgot Cmatch is typedef to pointer */
+		matchorder = flags;
+		Cmatch **sp, **asp;
+		sp = (Cmatch **) zhalloc((n + 1) * sizeof(Cmatch *));
+		asp = sp;
+		for (i = 0; i < n; i++)
+		    *asp++ = rp + i;
+		*asp = NULL;
+		qsort((void *) sp, n, sizeof(Cmatch *),
+		      (int (*) _((const void *, const void *)))matchcmp_pointer);
+		for (asp = sp + 1; *asp; asp++) {
+		    Cmatch *ap = asp[-1], *bp = asp[0];
+		    if (matcheq(*ap, *bp)) {
+			bp[0]->flags = CMF_DELETE;
+		    } else if (!ap[0]->disp) {
+			/* Mark those, that would show the same string in the list. */
+			/* Mikael: I haven't tested this other than commenting out matcheq above */
+			Cmatch **bsp = sp;
+			for (dup = 0; bp[0] && !(bp[0])->disp &&
+				 !strcmp((*ap)->str, (bp[0])->str); bp = *++sp) {
+			    (bp[0])->flags |= CMF_MULT;
+			    dup = 1;
+			}
+			if (dup)
+			    (*ap)->flags |= CMF_FMULT;
 		    }
-		    *cp = NULL;
-                    if (!(*ap)->disp) {
-                        for (dup = 0, bp = ap + 1; *bp; bp++)
-                            if (!(*bp)->disp &&
-                                !((*bp)->flags & CMF_MULT) &&
-                                !strcmp((*ap)->str, (*bp)->str)) {
-                                (*bp)->flags |= CMF_MULT;
-                                dup = 1;
-                            }
-                        if (dup)
-                            (*ap)->flags |= CMF_FMULT;
-                    }
 		}
+		int n_orig = n;
+		for (bp = rp, ap = rp; bp < rp + n_orig; ap++, bp++) {
+		    while (bp[0]->flags & CMF_DELETE) {
+			bp++;
+			n--;
+		    }
+		    *ap = *bp;
+		}
+		*ap = NULL;
 	    /* passed -1 but not -2, so remove consecutive duplicates (efficient) */
 	    } else if (!(flags & CGF_UNIQCON)) {
 		int dup;
-- 
2.15.1





Messages sorted by: Reverse Date, Date, Thread, Author