Zsh Mailing List Archive Messages sorted by: Reverse Date, Date, Thread, Author
PATCH: underscore grouping in numeric output

X-seq: zsh-workers 32299
From: Peter Stephenson <p.stephenson@xxxxxxxxxxx>
To: Zsh Hackers' List <zsh-workers@xxxxxxx>
Subject: PATCH: underscore grouping in numeric output
Date: Wed, 22 Jan 2014 15:37:31 +0000
List-help: <mailto:zsh-workers-help@zsh.org>
List-id: Zsh Workers List <zsh-workers.zsh.org>
List-post: <mailto:zsh-workers@zsh.org>
Mailing-list: contact zsh-workers-help@xxxxxxx; run by ezmlm
Organization: Samsung Cambridge Solution Centre
As you may be aware, the shell now supports "_" in numeric constants on
input to allow grouping for visual clarity.  This is compatible with
a lot of other scripting languages.

It would be quite useful (in many ways more useful) to get this on
output.  There's an easy way to get this into the syntax in a relatively
natural way: extend the `[#16]' syntax that allows output with a
particular base to take an underscore and an optional integer to
indicate the group size.  This works with floating point numbers too.
See the manual entry and the new tests.

I've tweaked zcalc to understand this extended syntax.  So `[#_]' on its
own on turns on grouping every three digits on output.

I'm half expecting the new floating point test to fail on some
implementation.  If it does, it would be good if you could send
me something that works.

You can finally throw away your old calculator.  (I've just removed some
*very* crufty old batteries from mine...)

diff --git a/Doc/Zsh/arith.yo b/Doc/Zsh/arith.yo
index 2674c78..4fff28f 100644
--- a/Doc/Zsh/arith.yo
+++ b/Doc/Zsh/arith.yo
@@ -76,6 +76,27 @@ have output base 16, while tt(x) (assuming it does not already exist) is
 implicitly typed by the arithmetic evaluation, where it acquires the output
 base 8.
 
+The var(base) may be replaced or followed by an underscore, which may
+itself be followed by a positive integer (if it is missing the value 3
+is used).  This indicates that underscores should be inserted into the
+output string, grouping the number for visual clarity.  The following
+integer specifies the number of digits to group together.  For example:
+
+example(setopt cbases
+print $(( [#16_4] 65536 ** 2 )))
+
+outputs `tt(0x1_0000_0000)'.
+
+The feature can be used with floating
+point numbers, in which case the base must be omitted; grouping
+is away from the decimal point.  For example,
+
+example(zmodload zsh/mathfunc
+print $(( [#_] sqrt(1e7) )))
+
+outputs `tt(3_162.277_660_168_379_5)' (the number of decimal places
+shown may vary).
+
 pindex(C_BASES, use of)
 pindex(OCTAL_ZEROES, use of)
 If the tt(C_BASES) option is set, hexadecimal numbers in the standard C
diff --git a/Functions/Misc/zcalc b/Functions/Misc/zcalc
index 1f3392d..6a56d47 100644
--- a/Functions/Misc/zcalc
+++ b/Functions/Misc/zcalc
@@ -196,7 +196,7 @@ while (( expression_mode )) ||
   # special cases
   # Set default base if `[#16]' or `[##16]' etc. on its own.
   # Unset it if `[#]' or `[##]'.
-  if [[ $line = (#b)[[:blank:]]#('[#'(\#|)(<->|)']')[[:blank:]]#(*) ]]; then
+  if [[ $line = (#b)[[:blank:]]#('[#'(\#|)((<->|)(|_|_<->))']')[[:blank:]]#(*) ]]; then
     if [[ -z $match[4] ]]; then
       if [[ -z $match[3] ]]; then
 	defbase=
diff --git a/Src/math.c b/Src/math.c
index 42355f8..2665698 100644
--- a/Src/math.c
+++ b/Src/math.c
@@ -556,6 +556,9 @@ lexconstant(void)
 int outputradix;
 
 /**/
+int outputunderscore;
+
+/**/
 static int
 zzlex(void)
 {
@@ -713,7 +716,7 @@ zzlex(void)
 	    return EOI;
 	case '[':
 	    {
-		int n;
+		int n, checkradix = 0;
 
 		if (idigit(*ptr)) {
 		    n = zstrtol(ptr, &ptr, 10);
@@ -730,9 +733,19 @@ zzlex(void)
 			n = -1;
 			ptr++;
 		    }
-		    if (!idigit(*ptr))
+		    if (!idigit(*ptr) && *ptr != '_')
 			goto bofs;
-		    outputradix = n * zstrtol(ptr, &ptr, 10);
+		    if (idigit(*ptr)) {
+			outputradix = n * zstrtol(ptr, &ptr, 10);
+			checkradix = 1;
+		    }
+		    if (*ptr == '_') {
+			ptr++;
+			if (idigit(*ptr))
+			    outputunderscore = zstrtol(ptr, &ptr, 10);
+			else
+			    outputunderscore = 3;
+		    }
 		} else {
 		    bofs:
 		    zerr("bad output format specification");
@@ -740,11 +753,13 @@ zzlex(void)
 		}
 		if(*ptr != ']')
 			goto bofs;
-		n = (outputradix < 0) ? -outputradix : outputradix;
-		if (n < 2 || n > 36) {
-		    zerr("invalid base (must be 2 to 36 inclusive): %d",
-			 outputradix);
-		    return EOI;
+		if (checkradix) {
+		    n = (outputradix < 0) ? -outputradix : outputradix;
+		    if (n < 2 || n > 36) {
+			zerr("invalid base (must be 2 to 36 inclusive): %d",
+			     outputradix);
+			return EOI;
+		    }
 		}
 		ptr++;
 		break;
@@ -1337,9 +1352,9 @@ matheval(char *s)
     char *junk;
     mnumber x;
     int xmtok = mtok;
-    /* maintain outputradix across levels of evaluation */
+    /* maintain outputradix and outputunderscore across levels of evaluation */
     if (!mlevel)
-	outputradix = 0;
+	outputradix = outputunderscore = 0;
 
     if (!*s) {
 	x.type = MN_INTEGER;
diff --git a/Src/params.c b/Src/params.c
index ad9e347..dc41c6c 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -2416,9 +2416,10 @@ setnumvalue(Value v, mnumber val)
 	if ((val.type & MN_INTEGER) || outputradix) {
 	    if (!(val.type & MN_INTEGER))
 		val.u.l = (zlong) val.u.d;
-	    convbase(p = buf, val.u.l, outputradix);
+	    p = convbase_underscore(buf, val.u.l, outputradix,
+				    outputunderscore);
 	} else
-	    p = convfloat(val.u.d, 0, 0, NULL);
+	    p = convfloat_underscore(val.u.d, outputunderscore);
 	setstrvalue(v, ztrdup(p));
 	break;
     case PM_INTEGER:
@@ -4555,9 +4556,14 @@ delenv(Param pm)
      */
 }
 
+/*
+ * Guts of convbase: this version can return the number of digits
+ * sans any base discriminator.
+ */
+
 /**/
-mod_export void
-convbase(char *s, zlong v, int base)
+void
+convbase_ptr(char *s, zlong v, int base, int *ndigits)
 {
     int digs = 0;
     zulong x;
@@ -4583,6 +4589,8 @@ convbase(char *s, zlong v, int base)
 	x /= base;
     if (!digs)
 	digs = 1;
+    if (ndigits)
+	*ndigits = digs;
     s[digs--] = '\0';
     x = v;
     while (digs >= 0) {
@@ -4594,6 +4602,64 @@ convbase(char *s, zlong v, int base)
 }
 
 /*
+ * Basic conversion of integer to a string given a base.
+ * If 0 base is 10.
+ * If negative no base discriminator is output.
+ */
+
+/**/
+mod_export void
+convbase(char *s, zlong v, int base)
+{
+    convbase_ptr(s, v, base, NULL);
+}
+
+/*
+ * Add underscores to converted integer for readability with given spacing.
+ * s is as for convbase: at least BDIGBUFSIZE.
+ * If underscores were added, returned value with underscores comes from
+ * heap, else the returned value is s.
+ */
+
+/**/
+char *
+convbase_underscore(char *s, zlong v, int base, int underscore)
+{
+    char *retptr, *sptr, *dptr;
+    int ndigits, nunderscore, mod, len;
+
+    convbase_ptr(s, v, base, &ndigits);
+
+    if (underscore <= 0)
+	return s;
+
+    nunderscore = (ndigits - 1) / underscore;
+    if (!nunderscore)
+	return s;
+    len = strlen(s);
+    retptr = zhalloc(len + nunderscore + 1);
+    mod = 0;
+    memcpy(retptr, s, len - ndigits);
+    sptr = s + len;
+    dptr = retptr + len + nunderscore;
+    /* copy the null */
+    *dptr-- = *sptr--;
+    for (;;) {
+	*dptr = *sptr;
+	if (!--ndigits)
+	    break;
+	dptr--;
+	sptr--;
+	if (++mod == underscore) {
+	    mod = 0;
+	    *dptr-- = '_';
+	}
+    }
+
+    return retptr;
+}
+
+/*
  * Convert a floating point value for output.
  * Unlike convbase(), this has its own internal storage and returns
  * a value from the heap.
@@ -4659,6 +4725,83 @@ convfloat(double dval, int digits, int flags, FILE *fout)
     return ret;
 }
 
+/*
+ * convert float to string with basic options but inserting underscores
+ * for readability.
+ */
+
+/**/
+char *convfloat_underscore(double dval, int underscore)
+{
+    int ndigits_int = 0, ndigits_frac = 0, nunderscore, len;
+    char *s, *retptr, *sptr, *dptr;
+
+    s = convfloat(dval, 0, 0, NULL);
+    if (underscore <= 0)
+	return s;
+
+    /*
+     * Count the number of digits before and after the decimal point, if any.
+     */
+    sptr = s;
+    if (*sptr == '-')
+	sptr++;
+    while (idigit(*sptr)) {
+	ndigits_int++;
+	sptr++;
+    }
+    if (*sptr == '.') {
+	sptr++;
+	while (idigit(*sptr)) {
+	    ndigits_frac++;
+	    sptr++;
+	}
+    }
+
+    /*
+     * Work out how many underscores to insert --- remember we
+     * put them in integer and fractional parts separately.
+     */
+    nunderscore = (ndigits_int-1) / underscore + (ndigits_frac-1) / underscore;
+    if (!nunderscore)
+	return s;
+    len = strlen(s);
+    dptr = retptr = zhalloc(len + nunderscore + 1);
+
+    /*
+     * Insert underscores in integer part.
+     * Grouping starts from the point in both directions.
+     */
+    sptr = s;
+    if (*sptr == '-')
+	*dptr++ = *sptr++;
+    while (ndigits_int) {
+	*dptr++ = *sptr++;
+	if (--ndigits_int && !(ndigits_int % underscore))
+	    *dptr++ = '_';
+    }
+    if (ndigits_frac) {
+	/*
+	 * Insert underscores in the fractional part.
+	 */
+	int mod = 0;
+	/* decimal point, we already checked */
+	*dptr++ = *sptr++;
+	while (ndigits_frac) {
+	    *dptr++ = *sptr++;
+	    mod++;
+	    if (--ndigits_frac && mod == underscore) {
+		*dptr++ = '_';
+		mod = 0;
+	    }
+	}
+    }
+    /* Copy exponent and anything else up to null */
+    while ((*dptr++ = *sptr++))
+	;
+    return retptr;
+}
+
 /* Start a parameter scope */
 
 /**/
diff --git a/Src/subst.c b/Src/subst.c
index 1059508..cc5df3f 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -3754,19 +3754,19 @@ static char *
 arithsubst(char *a, char **bptr, char *rest)
 {
     char *s = *bptr, *t;
-    char buf[BDIGBUFSIZE], *b = buf;
+    char buf[BDIGBUFSIZE], *b;
     mnumber v;
 
     singsub(&a);
     v = matheval(a);
     if ((v.type & MN_FLOAT) && !outputradix)
-	b = convfloat(v.u.d, 0, 0, NULL);
+	b = convfloat_underscore(v.u.d, outputunderscore);
     else {
 	if (v.type & MN_FLOAT)
 	    v.u.l = (zlong) v.u.d;
-	convbase(buf, v.u.l, outputradix);
+	b = convbase_underscore(buf, v.u.l, outputradix, outputunderscore);
     }
-    t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) + 
+    t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) +
 				 strlen(rest) + 1);
     t--;
     while ((*++t = *s++));
diff --git a/Test/C01arith.ztst b/Test/C01arith.ztst
index 7b005c2..25cd8b8 100644
--- a/Test/C01arith.ztst
+++ b/Test/C01arith.ztst
@@ -266,3 +266,19 @@
 >48.5
 >77.5
 >63.5
+
+  underscore_integer() {
+    setopt cbases localoptions
+    print $(( [#_] 1000000 ))
+    print $(( [#16_] 65536 ))
+    print $(( [#16_4] 65536 * 32768 ))
+  }
+  underscore_integer
+0:Grouping output with underscores: integers
+>1_000_000
+>0x10_000
+>0x8000_0000
+
+  print $(( [#_] (5. ** 10) / 16. ))
+0:Grouping output with underscores: floating point
+>610_351.562_5

pws
Messages sorted by: Reverse Date, Date, Thread, Author