Zsh Mailing List Archive Messages sorted by: Reverse Date, Date, Thread, Author

zsh_error_db --- hash-based database of error messages

X-seq: zsh-workers 51224
From: Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
To: zsh workers <zsh-workers@xxxxxxx>
Subject: zsh_error_db --- hash-based database of error messages
Date: Fri, 16 Dec 2022 16:42:53 +0000 (GMT)
Archived-at: <https://zsh.org/workers/51224>
Importance: Normal
List-id: <zsh-workers.zsh.org>

Following on from the retread of the discussion on error messages,
here's a very simply proof of concept for a hash-based database of
error messages.  Even if it's adopted I don't intend the C code
to get much larger as the point is to allow it to be able to do
everything in shell code.

Apart from more user-friendly messages which someone with no
access to the C code can dedicate their life to tweaking,
this is obviously a boon for internationalization (which
I approve of so much I've even spelled it with a 'z').

Trivial example:

% typeset -A zsh_error_db=(E3 "not very many matches found: %s")
% setopt nomatch                                                
% echo fudge*
zsh: not very many matches found: fudge*

Apart from updating the error message to examine the hash, the
only change is to prefix the error message format string with
the code such as "E3" and a colon.  This would obviously
be done universally.

The E, a set of digits, and a colon is tested for.  I originally
didn't have the E in front, but see my other comment on hash lookups
below.  There is obviously room for doing this differently, and
some way of making it easy to avoid duplicates would probably be
good, but I think it needs to be reasonably simple.

I didn't look exhaustively, but it seems we don't have a potted
function that takes the name of a hash and an entry and looks
it up as a string, though the code for this isn't very verbose
anyway.  We don't actually force the entry of the hash to
be a string type, but in practice I think it always is.
The "E" prefix results because at one stage numbers were
doing positional parameter lookups; I've since then gone
into the hash handling at lower level so that doesn't happen
any  more, but I left the E as giving a slightly more
extensible interface, which I expect everyone else is going
to tear apart anyway.

The signatures of the format strings, i.e. where the % escapes
live, is checked for, though if there are cases where
we have more than a single letter code it'll fall over.
There's no provision for reordering escapes, either, but I
wouldn't expect to do that in an initial implementation anyway.

pws

diff --git a/Src/exec.c b/Src/exec.c
index 2b7e0c7c5..5ad56eced 100644
--- a/Src/exec.c
+++ b/Src/exec.c
@@ -3333,7 +3333,7 @@ execcmd_exec(Estate state, Execcmd_params eparams,
 		     * has provided a status.
 		     */
 		    if (badcshglob == 1) {
-			zerr("no match");
+			zerr("E2:no match");
 			lastval = 1;
 			if (forked)
 			    _realexit();
diff --git a/Src/glob.c b/Src/glob.c
index 490bafc37..493c4227a 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -1868,7 +1868,7 @@ zglob(LinkList list, LinkNode np, int nountok)
 	if (isset(CSHNULLGLOB)) {
 	    badcshglob |= 1;	/* at least one cmd. line expansion failed */
 	} else if (isset(NOMATCH)) {
-	    zerr("no matches found: %s", ostr);
+	    zerr("E3:no matches found: %s", ostr);
 	    zfree(matchbuf, 0);
 	    restore_globstate(saved);
 	    return;
diff --git a/Src/subst.c b/Src/subst.c
index 0f98e6ea3..571673d11 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -504,7 +504,7 @@ globlist(LinkList list, int flags)
     if (noerrs)
 	badcshglob = 0;
     else if (badcshglob == 1)
-	zerr("no match");
+	zerr("E1:no match");
 }
 
 /* perform substitution on a single word */
diff --git a/Src/utils.c b/Src/utils.c
index edf5d3df7..e8070df1e 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -119,6 +119,93 @@ set_widearray(char *mb_array, Widechar_array wca)
 }
 #endif
 
+/**/
+static const char *
+zerrmsg_bad_signature(const char *code, const char *internal)
+{
+    /* Don't attempt to use error message system here! */
+    fprintf(stderr, "zsh_err_db entry `%s' has incorrect signature for:\n%s\n",
+	code, internal);
+    return internal;
+}
+
+/* Attempt to use hash zsh_error_db to update message */
+/**/
+static const char *
+zerrmsg_from_hash(const char *msg)
+{
+    Param errdb, msgpm;
+    HashTable errtab;
+    const char *postcode = msg, *sigmsg, *sigvar, *imsg;
+    char *errcode, *newmsg;
+
+    if (*postcode++ != 'E')
+	return msg;
+    while (idigit(*postcode))
+	++postcode;
+    if (postcode == msg || *postcode != ':')
+	return msg;
+
+    imsg = postcode+1;
+    errdb = (Param)paramtab->getnode(paramtab, "zsh_error_db");
+    if (!errdb || !(errdb->node.flags & PM_HASHED)) {
+	return imsg;
+    }
+
+    errcode = dupstrpfx(msg, postcode-msg);
+    errtab = errdb->gsu.h->getfn(errdb);
+    if (!errtab)
+	return imsg;
+    msgpm = (Param)errtab->getnode(errtab, errcode);
+    if (PM_TYPE(msgpm->node.flags)) {
+	/* Not a plain string, bail out (safety) */
+	return imsg;
+    }
+    newmsg = msgpm->gsu.s->getfn(msgpm);
+
+    if (!newmsg || !*newmsg)
+	return imsg;
+
+    /* Check the %-signature matches */
+    sigmsg = imsg;
+    sigvar = newmsg;
+
+    for (;;) {
+	while (*sigmsg && *sigmsg != '%')
+	    sigmsg++;
+	if (!*sigmsg)
+	    break;
+	++sigmsg;
+	if (*sigmsg == '%') {
+	    ++sigmsg;
+	    continue;
+	}
+	while (*sigvar) {
+	    if (*sigvar++ == '%')
+	    {
+		if (*sigvar != '%')
+		    break;
+		++sigvar;
+	    }
+	}
+	if (!*sigvar || *sigvar != *sigmsg)
+	    return zerrmsg_bad_signature(errcode, imsg);
+	++sigvar;
+	++sigmsg;
+    }
+    while (*sigvar)
+    {
+	if (*sigvar++ == '%')
+	{
+	    if (*sigvar != '%')
+		return zerrmsg_bad_signature(errcode, imsg);
+	    ++sigvar;
+	}
+    }
+
+    return newmsg;
+}
+
 
 /* Print an error
 
@@ -305,6 +392,8 @@ zerrmsg(FILE *file, const char *fmt, va_list ap)
     } else
 	fputc((unsigned char)' ', file);
 
+    fmt = zerrmsg_from_hash(fmt);
+
     while (*fmt)
 	if (*fmt == '%') {
 	    fmt++;

Follow-Ups:
- Re: zsh_error_db --- hash-based database of error messages
  - From: Oliver Kiddle
- Re: zsh_error_db --- hash-based database of error messages
  - From: Daniel Shahaf

Messages sorted by: Reverse Date, Date, Thread, Author