Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: Add customization and multibyte support to the correction keymap



The bottom row also gains an extra slot on the left side for
international layouts which have an extra key there.
---

Since nobody complained, I'll assume everyone is totally on board with
this idea so I went ahead and added multibyte support too.

It's obviously a little boring that the logic gets duplicated, but
I'm not sure if it's any better to stuff it full of defined tokens?
Maybe it would be. If anyone has strong feelings about it, I could give
it a shot. Also, I don't use the non-multibyte code when MULTIBYTE isn't
set, because that doesn't really seem useful to do? But I guess that
could be done too, if someone has a good reason I overlooked.

I was also a little worried about repeated conversions of all the
hashtable strings, but I haven't noticed anything being slower, fingers
crossed?

I'm also still open to comments on the parameter name.

Anyway, this finally lets me do this in my .zshrc,
CORRECT_KEYMAP="\
1234567890[]
å,.pyfgcrl/=
aoeuidhtns-;
öäqjkxbmwvz
!@#$%^&*(){}
Å<>PYFGCRL?+
AOEUIDHTNS_:
ÖÄQJKXBMWVZ"

and then
% emäcs
zsh: correct 'emäcs' to 'emacs' [nyae]? y

Truly, the future has come.

 Src/params.c |   4 +
 Src/utils.c  | 316 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 285 insertions(+), 35 deletions(-)

diff --git a/Src/params.c b/Src/params.c
index aabfc31206..e4d596a74c 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -248,6 +248,9 @@ static const struct gsu_scalar underscore_gsu =
 { underscoregetfn, nullstrsetfn, stdunsetfn };
 static const struct gsu_scalar keyboard_hack_gsu =
 { keyboardhackgetfn, keyboardhacksetfn, stdunsetfn };
+static const struct gsu_scalar correct_gsu =
+{ get_correct_keymap, set_correct_keymap, stdunsetfn };
+
 #ifdef USE_LOCALE
 static const struct gsu_scalar lc_blah_gsu =
 { strgetfn, lcsetfn, stdunsetfn };
@@ -322,6 +325,7 @@ IPDEF2("WORDCHARS", wordchars_gsu, 0),
 IPDEF2("IFS", ifs_gsu, PM_DONTIMPORT),
 IPDEF2("_", underscore_gsu, PM_DONTIMPORT),
 IPDEF2("KEYBOARD_HACK", keyboard_hack_gsu, PM_DONTIMPORT),
+IPDEF2("CORRECT_KEYMAP", correct_gsu, PM_DONTIMPORT),
 IPDEF2("0", argzero_gsu, 0),
 
 #ifdef USE_LOCALE
diff --git a/Src/utils.c b/Src/utils.c
index 13752e7569..c7076e3fe9 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -4694,44 +4694,289 @@ mindist(char *dir, char *mindistguess, char *mindistbest, int wantdir)
     return mindistd;
 }
 
+#ifdef MULTIBYTE_SUPPORT
+wchar_t qwertykeymap[] =
+L"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t1234567890-=\t\
+\t\tqwertyuiop[]\t\
+\t\tasdfghjkl;'\t\t\
+\t\tzxcvbnm,./\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t!@#$%^&*()_+\t\
+\t\tQWERTYUIOP{}\t\
+\t\tASDFGHJKL:\"\t\t\
+\t\tZXCVBNM<>?\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
+wchar_t dvorakkeymap[] =
+L"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t1234567890[]\t\
+\t\t',.pyfgcrl/=\t\
+\t\taoeuidhtns-\t\t\
+\t\t;qjkxbmwvz\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t!@#$%^&*(){}\t\
+\t\t\"<>PYFGCRL?+\t\
+\t\tAOEUIDHTNS_\t\t\
+\t\t:QJKXBMWVZ\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
+wchar_t *keymap = NULL;
+
+/**/
+char *
+get_correct_keymap(UNUSED(Param pm))
+{
+    if (!keymap || keymap == dvorakkeymap || keymap == qwertykeymap) {
+	keymap = isset(DVORAK) ? dvorakkeymap : qwertykeymap;
+    }
+    char pretty_keymap[13*8 * MB_CUR_MAX + 8];
+
+    const size_t pos[] = { 15 + 2, 15*2 + 2, 15*3 + 2, 15*4 + 1, 15*6 + 2, 15*7 + 2, 15*8 + 2, 15*9 + 1 };
+    int line, ch;
+    mbstate_t mbs;  
+    memset(&mbs, 0, sizeof(mbs));  
+    char *p = pretty_keymap;
+    for (line = 0; line < 8; line++) {
+	for (ch = 0; ch < 12; ch++) {
+	    p += wcrtomb(p, keymap[pos[line]+ch], &mbs);
+	}
+	*p++ = '\n';
+    }
+    *--p = '\0';
+    return metafy(pretty_keymap, p - pretty_keymap, META_USEHEAP);
+}
+
+/**/
+void
+set_correct_keymap(UNUSED(Param pm), char *x)
+{
+    wchar_t *oldkeymap = keymap;
+    if (x && *x) {
+	wchar_t newkeymap[166];
+	mbstate_t mbs;
+	memset(&mbs, 0, sizeof(mbs));
+	wmemset(newkeymap, L'\t', 165);
+	wmemset(&newkeymap[0], L'\n', 15);
+	wmemset(&newkeymap[5*15], L'\n', 15);
+	wmemset(&newkeymap[10*15], L'\n', 15);
+	newkeymap[165] = L'\0';
+	int line;
+	int xlen;
+	unmetafy(x, &xlen);
+	wchar_t *wx = zalloc(sizeof(wchar_t) * (xlen + 1)), *wxp = wx;
+	const char *xr = x;
+	mbsrtowcs(wx, &xr, xlen, &mbs);
+
+	wchar_t *p = wx;
+	const size_t pos[] = { 15 + 2, 15*2 + 2, 15*3 + 2, 15*4 + 1, 15*6 + 2, 15*7 + 2, 15*8 + 2, 15*9 + 1 };
+	for (line = 0; line < 8; line++) {
+	    wchar_t *end = wcschr(p, L'\n');
+	    if (!end) {
+		if (line < 7) {
+		    zwarn("CORRECT_KEYMAP needs to be set to 8 lines");
+		    free(x);
+		    free(wx);
+		    return;
+		}
+		end = wcschr(p, L'\0');
+	    }
+	    if (end - p > 12) {
+		zwarn("Lines in CORRECT_KEYMAP can be at most 12 characters, line %d was longer", line + 1);
+		free(x);
+		free(wx);
+		return;
+	    }
+	    wmemcpy(&newkeymap[pos[line]], p, end-p);
+	    p = end+1;
+	}
+	keymap = wcs_ztrdup(newkeymap);
+	free(x);
+	free(wx);
+    } else {
+	keymap = NULL;
+    }
+    if (oldkeymap && oldkeymap != qwertykeymap && oldkeymap != dvorakkeymap)
+	free(oldkeymap);
+}
+
 /**/
 static int
 spdist(char *s, char *t, int thresh)
 {
-    /* TODO: Correction for non-ASCII and multibyte-input keyboards. */
-    char *p, *q;
-    const char qwertykeymap[] =
-    "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
-\t1234567890-=\t\
-\tqwertyuiop[]\t\
-\tasdfghjkl;'\n\t\
-\tzxcvbnm,./\t\t\t\
-\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
-\t!@#$%^&*()_+\t\
-\tQWERTYUIOP{}\t\
-\tASDFGHJKL:\"\n\t\
-\tZXCVBNM<>?\n\n\t\
-\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
-    const char dvorakkeymap[] =
-    "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
-\t1234567890[]\t\
-\t',.pyfgcrl/=\t\
-\taoeuidhtns-\n\t\
-\t;qjkxbmwvz\t\t\t\
-\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
-\t!@#$%^&*(){}\t\
-\t\"<>PYFGCRL?+\t\
-\tAOEUIDHTNS_\n\t\
-\t:QJKXBMWVZ\n\n\t\
-\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
-    const char *keymap;
-    if ( isset( DVORAK ) )
-      keymap = dvorakkeymap;
-    else
-      keymap = qwertykeymap;
+    if (!strcmp(s, t))
+	return 0;
+
+    int slen = strlen(s), tlen = strlen(t);
+    wchar_t *ws = zalloc(sizeof(wchar_t) * (slen + 1));
+    wchar_t *wt = zalloc(sizeof(wchar_t) * (tlen + 1));
+    mbstate_t mbs;
+    wchar_t *wsp = ws, *wtp = wt;
+
+    memset(&mbs, 0, sizeof(mbs));
+    mbsrtowcs(ws, (const char**)&s, slen, &mbs);
+    memset(&mbs, 0, sizeof(mbs));
+    mbsrtowcs(wt, (const char**)&t, tlen, &mbs);
 
+    int result = wcspdist(ws, wt, thresh);
+    zfree(ws, sizeof(wchar_t) * (slen + 1));
+    zfree(wt, sizeof(wchar_t) * (slen + 1));
+    return result;
+
+}
+
+/**/
+static int wcspdist(wchar_t *s, wchar_t *t, int thresh)
+{
+    wchar_t *p, *q;
+    if (!keymap || keymap == dvorakkeymap || keymap == qwertykeymap) {
+	if ( isset( DVORAK ) )
+	    keymap = dvorakkeymap;
+	else
+	    keymap = qwertykeymap;
+    }
+
+    /* any number of upper/lower mistakes allowed (dist = 1) */
+    for (p = s, q = t; *p && towlower(*p) == towlower(*q); p++, q++);
+    if (!*p && !*q)
+	return 1;
+    if (!thresh)
+	return 200;
+    for (p = s, q = t; *p && *q; p++, q++)
+	if (*p == *q)
+	    continue;		/* don't consider "aa" transposed, ash */
+	else if (p[1] == q[0] && q[1] == p[0])	/* transpositions */
+	    return wcspdist(p + 2, q + 2, thresh - 1) + 1;
+	else if (p[1] == q[0])	/* missing letter */
+	    return wcspdist(p + 1, q + 0, thresh - 1) + 2;
+	else if (p[0] == q[1])	/* missing letter */
+	    return wcspdist(p + 0, q + 1, thresh - 1) + 2;
+	else if (*p != *q)
+	    break;
+    if ((!*p && wcslen(q) == 1) || (!*q && wcslen(p) == 1))
+	return 2;
+    for (p = s, q = t; *p && *q; p++, q++)
+	if (p[0] != q[0] && p[1] == q[1]) {
+	    int t0;
+	    wchar_t *z;
+
+	    /* mistyped letter */
+
+	    if (!(z = wcschr(keymap, p[0])) || *z == L'\n' || *z == L'\t')
+		return wcspdist(p + 1, q + 1, thresh - 1) + 1;
+	    t0 = z - keymap;
+	    if (*q == keymap[t0 - 16] || *q == keymap[t0 - 15] ||
+		*q == keymap[t0 - 14] ||
+		*q == keymap[t0 - 1] || *q == keymap[t0 + 1] ||
+		*q == keymap[t0 + 14] || *q == keymap[t0 + 15] ||
+		*q == keymap[t0 + 16])
+		return wcspdist(p + 1, q + 1, thresh - 1) + 2;
+	    return 200;
+	} else if (*p != *q)
+	    break;
+    return 200;
+}
+#else
+char qwertykeymap[] =
+"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t1234567890-=\t\
+\t\tqwertyuiop[]\t\
+\t\tasdfghjkl;'\t\t\
+\t\tzxcvbnm,./\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t!@#$%^&*()_+\t\
+\t\tQWERTYUIOP{}\t\
+\t\tASDFGHJKL:\"\t\t\
+\t\tZXCVBNM<>?\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
+char dvorakkeymap[] =
+"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t1234567890[]\t\
+\t\t',.pyfgcrl/=\t\
+\t\taoeuidhtns-\t\t\
+\t\t;qjkxbmwvz\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\
+\t\t!@#$%^&*(){}\t\
+\t\t\"<>PYFGCRL?+\t\
+\t\tAOEUIDHTNS_\t\t\
+\t\t:QJKXBMWVZ\t\t\t\
+\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n";
+char *keymap = NULL;
+
+/**/
+char *
+get_correct_keymap(UNUSED(Param pm))
+{
+    if (!keymap || keymap == dvorakkeymap || keymap == qwertykeymap) {
+	keymap = isset(DVORAK) ? dvorakkeymap : qwertykeymap;
+    }
+    char pretty_keymap[13*8];
+
+    const size_t pos[] = { 15 + 2, 15*2 + 2, 15*3 + 2, 15*4 + 1, 15*6 + 2, 15*7 + 2, 15*8 + 2, 15*9 + 1 };
+    int line;
+    for (line = 0; line < 8; line++) {
+	memcpy(&pretty_keymap[13*line], &keymap[pos[line]], 12);
+	pretty_keymap[13*line + 12] = '\n';
+    }
+    pretty_keymap[13*8 - 1] = '\0';
+    return dupstring(pretty_keymap);
+}
+
+/**/
+void
+set_correct_keymap(UNUSED(Param pm), char *x)
+{
+    char *oldkeymap = keymap;
+    if (x && *x) {
+	char newkeymap[166];
+	memset(newkeymap, '\t', 165);
+	memset(&newkeymap[0], '\n', 15);
+	memset(&newkeymap[5*15], '\n', 15);
+	memset(&newkeymap[10*15], '\n', 15);
+	newkeymap[165] = '\0';
+	int line;
+	char *p = x;
+	const size_t pos[] = { 15 + 2, 15*2 + 2, 15*3 + 2, 15*4 + 1, 15*6 + 2, 15*7 + 2, 15*8 + 2, 15*9 + 1 };
+	for (line = 0; line < 8; line++) {
+	    char *end = strchr(p, '\n');
+	    if (!end) {
+		if (line < 7) {
+		    zwarn("CORRECT_KEYMAP needs to be set to 8 lines");
+		    free(x);
+		    return;
+		}
+		end = strchr(p, '\0');
+	    }
+	    if (end - p > 12) {
+		zwarn("Lines in CORRECT_KEYMAP can be at most 12 characters, line %d was longer", line + 1);
+		free(x);
+		return;
+	    }
+	    memcpy(&newkeymap[pos[line]], p, end-p);
+	    p = end+1;
+	}
+	keymap = ztrdup(newkeymap);
+	free(x);
+    } else {
+	keymap = NULL;
+    }
+    if (oldkeymap && oldkeymap != qwertykeymap && oldkeymap != dvorakkeymap)
+	free(oldkeymap);
+}
+
+/**/
+static int
+spdist(char *s, char *t, int thresh)
+{
+    char *p, *q;
     if (!strcmp(s, t))
 	return 0;
+
+    if (!keymap || keymap == dvorakkeymap || keymap == qwertykeymap) {
+	if ( isset( DVORAK ) )
+	    keymap = dvorakkeymap;
+	else
+	    keymap = qwertykeymap;
+    }
+
     /* any number of upper/lower mistakes allowed (dist = 1) */
     for (p = s, q = t; *p && tulower(*p) == tulower(*q); p++, q++);
     if (!*p && !*q)
@@ -4761,17 +5006,18 @@ spdist(char *s, char *t, int thresh)
 	    if (!(z = strchr(keymap, p[0])) || *z == '\n' || *z == '\t')
 		return spdist(p + 1, q + 1, thresh - 1) + 1;
 	    t0 = z - keymap;
-	    if (*q == keymap[t0 - 15] || *q == keymap[t0 - 14] ||
-		*q == keymap[t0 - 13] ||
+	    if (*q == keymap[t0 - 16] || *q == keymap[t0 - 15] ||
+		*q == keymap[t0 - 14] ||
 		*q == keymap[t0 - 1] || *q == keymap[t0 + 1] ||
-		*q == keymap[t0 + 13] || *q == keymap[t0 + 14] ||
-		*q == keymap[t0 + 15])
+		*q == keymap[t0 + 14] || *q == keymap[t0 + 15] ||
+		*q == keymap[t0 + 16])
 		return spdist(p + 1, q + 1, thresh - 1) + 2;
 	    return 200;
 	} else if (*p != *q)
 	    break;
     return 200;
 }
+#endif
 
 /* set cbreak mode, or the equivalent */
 
-- 
2.38.1





Messages sorted by: Reverse Date, Date, Thread, Author