Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: PATCH: Apply spell correction to autocd



On Feb 28, 10:44am, Peter Stephenson wrote:
}
} I don't think the internal spellchecking stuff has ever had a major
} overhaul (as distinct from having extra bits grafted on).  It's not
} surprising if it's weird.  I expect tidying it up would be a good idea.

OK, here's a stab at it.  See embedded comments (gasp).  Apply this instead
of the previous (20882) patch, not on top of it.

Index: Src/utils.c
===================================================================
RCS file: /extra/cvsroot/zsh/zsh-4.0/Src/utils.c,v
retrieving revision 1.21
diff -c -r1.21 utils.c
--- Src/utils.c	18 Feb 2005 17:05:17 -0000	1.21
+++ Src/utils.c	28 Feb 2005 18:06:44 -0000
@@ -1647,11 +1659,12 @@
 mod_export void
 spckword(char **s, int hist, int cmd, int ask)
 {
-    char *t, *u;
+    char *t;
     int x;
     char ic = '\0';
     int ne;
     int preflen = 0;
+    int autocd = cmd && isset(AUTOCD) && strcmp(*s, ".") && strcmp(*s, "..");
 
     if ((histdone & HISTFLAG_NOEXEC) || **s == '-' || **s == '%')
 	return;
@@ -1715,8 +1728,7 @@
 	}
 	if (access(unmeta(guess), F_OK) == 0)
 	    return;
-	if ((u = spname(guess)) != guess)
-	    best = u;
+	best = spname(guess);
 	if (!*t && cmd) {
 	    if (hashcmd(guess, pathchecked))
 		return;
@@ -1726,12 +1738,28 @@
 	    scanhashtable(shfunctab, 1, 0, 0, spscan, 0);
 	    scanhashtable(builtintab, 1, 0, 0, spscan, 0);
 	    scanhashtable(cmdnamtab, 1, 0, 0, spscan, 0);
+	    if (autocd) {
+		char **pp;
+		for (pp = cdpath; *pp; pp++) {
+		    char bestcd[PATH_MAX + 1];
+		    int thisdist;
+		    /* Less than d here, instead of less than or equal  *
+		     * as used in spscan(), so that an autocd is chosen *
+		     * only when it is better than anything so far, and *
+		     * so we prefer directories earlier in the cdpath.  */
+		    if ((thisdist = mindist(*pp, *s, bestcd)) < d) {
+			best = dupstring(bestcd);
+			d = thisdist;
+		    }
+		}
+	    }
 	}
     }
     if (errflag)
 	return;
     if (best && (int)strlen(best) > 1 && strcmp(best, guess)) {
 	if (ic) {
+	    char *u;
 	    if (preflen) {
 		/* do not correct the result of an expansion */
 		if (strncmp(guess, best, preflen))
@@ -2421,10 +2449,14 @@
 {
     char *p, spnameguess[PATH_MAX + 1], spnamebest[PATH_MAX + 1];
     static char newname[PATH_MAX + 1];
-    char *new = newname, *old;
-    int bestdist = 200, thisdist;
+    char *new = newname, *old = oldname;
+    int bestdist = 0, thisdist, thresh, maxthresh = 0;
 
-    old = oldname;
+    /* This loop corrects each directory component of the path, stopping *
+     * when any correction distance would exceed the distance threshold. *
+     * NULL is returned only if the first component cannot be corrected; *
+     * otherwise a copy of oldname with a corrected prefix is returned.  *
+     * Rationale for this, if there ever was any, has been forgotten.    */
     for (;;) {
 	while (*old == '/')
 	    *new++ = *old++;
@@ -2436,15 +2468,29 @@
 	    if (p < spnameguess + PATH_MAX)
 		*p++ = *old;
 	*p = '\0';
-	if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= 3) {
-	    if (bestdist < 3) {
+	/* Every component is allowed a single distance 2 correction or two *
+	 * distance 1 corrections.  Longer ones get additional corrections. */
+	thresh = (int)(p - spnameguess) / 4 + 1;
+	if (thresh < 3)
+	    thresh = 3;
+	if ((thisdist = mindist(newname, spnameguess, spnamebest)) >= thresh) {
+	    /* The next test is always true, except for the first path    *
+	     * component.  We could initialize bestdist to some large     *
+	     * constant instead, and then compare to that constant here,  *
+	     * because an invariant is that we've never exceeded the      *
+	     * threshold for any component so far; but I think that looks *
+	     * odd to the human reader, and we may make use of the total  *
+	     * distance for all corrections at some point in the future.  */
+	    if (bestdist < maxthresh) {
 		strcpy(new, spnameguess);
 		strcat(new, old);
 		return newname;
 	    } else
 	    	return NULL;
-	} else
-	    bestdist = thisdist;
+	} else {
+	    maxthresh = bestdist + thresh;
+	    bestdist += thisdist;
+	}
 	for (p = spnamebest; (*new = *p++);)
 	    new++;
     }
@@ -2487,6 +2533,7 @@
 static int
 spdist(char *s, char *t, int thresh)
 {
+    /* TODO: Correction for non-ASCII and multibyte-input keyboards. */
     char *p, *q;
     const char qwertykeymap[] =
     "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
@@ -2520,7 +2567,7 @@
 
     if (!strcmp(s, t))
 	return 0;
-/* any number of upper/lower mistakes allowed (dist = 1) */
+    /* any number of upper/lower mistakes allowed (dist = 1) */
     for (p = s, q = t; *p && tulower(*p) == tulower(*q); p++, q++);
     if (!*p && !*q)
 	return 1;
@@ -2544,7 +2591,7 @@
 	    int t0;
 	    char *z;
 
-	/* mistyped letter */
+	    /* mistyped letter */
 
 	    if (!(z = strchr(keymap, p[0])) || *z == '\n' || *z == '\t')
 		return spdist(p + 1, q + 1, thresh - 1) + 1;



Messages sorted by: Reverse Date, Date, Thread, Author