Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: improve ${(q)...}



It's annoyed me for a while now that single-quoting a value uses
single quotes even where they're not needed, and with existing single
quotes in the value at the start or end you get something quite messy.

This fixes the problem.  Is there a good reason for making this a
different flag, or is altering (q) good enough?  I couldn't think of a
reason why you would need it to be verbose, i.e. why you need

% v=foo
% print ${(qq)v}
'foo'

rather than just

foo

although I can think of arguments the other way.  We've never made any
guarantee over the final form of the quoting, just that it protects it
in the way the shell expects.

Note that the new form takes care that you still get the quotes at the
start and end if quotes are necessary (up to subtleties with embedded
single quotes which are also supposed to be gracefully handled).
This could be optimised in the simple case (no \' at the start), but
that can wait.

I won't commit this before 4.3.10.  New tests needed, too.

Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.105
diff -u -r1.105 expn.yo
--- Doc/Zsh/expn.yo	23 Mar 2009 12:17:33 -0000	1.105
+++ Doc/Zsh/expn.yo	28 Apr 2009 16:21:12 -0000
@@ -830,9 +830,13 @@
 quotes for each octet.  If this flag is given
 twice, the resulting words are quoted in single quotes and if it is
 given three times, the words are quoted in double quotes; in these forms
-no special handling of unprintable or invalid characters is attempted.  If
-the flag is given four times, the words are quoted in single quotes
+no special handling of unprintable or invalid characters is attempted.
+If the flag is given four times, the words are quoted in single quotes
 preceded by a tt($).
+
+If the context does not require a particular form of quoting, use
+of single quotes (tt(qq)) is recommended as this takes care to
+emit the minimum number of quotes required.
 )
 item(tt(Q))(
 Remove one level of quotes from the resulting words.
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.97
diff -u -r1.97 subst.c
--- Src/subst.c	23 Mar 2009 12:17:33 -0000	1.97
+++ Src/subst.c	28 Apr 2009 16:21:12 -0000
@@ -2800,8 +2800,29 @@
      * the repetitions of the (q) flag.
      */
     if (quotemod) {
+	int pre = 0, post = 0;
+
 	if (quotetype > QT_DOLLARS)
 	    quotetype = QT_DOLLARS;
+	if (quotemod > 0 && quotetype > QT_BACKSLASH) {
+	    switch (quotetype)
+	    {
+	    case QT_DOLLARS:
+		/* space for "$" */
+		pre = 2;
+		post = 1;
+		break;
+
+	    case QT_SINGLE:
+		quotetype = QT_SINGLE_OPTIONAL;
+		/* quotes will be added for us */
+		break;
+
+	    default:
+		pre = post = 1;
+		break;
+	    }
+	}
 	if (isarr) {
 	    char **ap;
 
@@ -2815,13 +2836,13 @@
 		    char *tmp;
 
 		    for (; *ap; ap++) {
-			int pre = quotetype != QT_DOLLARS ? 1 : 2;
 			tmp = quotestring(*ap, NULL, quotetype);
 			sl = strlen(tmp);
-			*ap = (char *) zhalloc(pre + sl + 2);
+			*ap = (char *) zhalloc(pre + sl + post + 1);
 			strcpy((*ap) + pre, tmp);
-			ap[0][pre - 1] = ap[0][pre + sl] =
-			    (quotetype != QT_DOUBLE ? '\'' : '"');
+			if (pre)
+			    ap[0][pre - 1] = ap[0][pre + sl] =
+				(quotetype != QT_DOUBLE ? '\'' : '"');
 			ap[0][pre + sl + 1] = '\0';
 			if (quotetype == QT_DOLLARS)
 			  ap[0][0] = '$';
@@ -2852,15 +2873,15 @@
 		val = dupstring(val), copied = 1;
 	    if (quotemod > 0) {
 		if (quotetype > QT_BACKSLASH) {
-		    int pre = quotetype != QT_DOLLARS ? 1 : 2;
 		    int sl;
 		    char *tmp;
 		    tmp = quotestring(val, NULL, quotetype);
 		    sl = strlen(tmp);
 		    val = (char *) zhalloc(pre + sl + 2);
 		    strcpy(val + pre, tmp);
-		    val[pre - 1] = val[pre + sl] =
-			(quotetype != QT_DOUBLE ? '\'' : '"');
+		    if (pre)
+			val[pre - 1] = val[pre + sl] =
+			    (quotetype != QT_DOUBLE ? '\'' : '"');
 		    val[pre + sl + 1] = '\0';
 		    if (quotetype == QT_DOLLARS)
 		      val[0] = '$';
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.221
diff -u -r1.221 utils.c
--- Src/utils.c	6 Apr 2009 09:06:36 -0000	1.221
+++ Src/utils.c	28 Apr 2009 16:21:12 -0000
@@ -4401,6 +4401,11 @@
  * 
  * The last argument is a QT_ value defined in zsh.h other than QT_NONE.
  *
+ * Most quote styles other than backslash assume the quotes are to
+ * be added outside quotestring().  QT_SINGLE_OPTIONAL is different:
+ * the single quotes are only added where necessary, so the
+ * whole expression is handled here.
+ *
  * The string may be metafied and contain tokens.
  */
 
@@ -4410,20 +4415,50 @@
 {
     const char *u, *tt;
     char *v;
+    int alloclen;
+    char *buf;
+    int sf = 0;
     /*
-     * With QT_BACKSLASH we may need to use $'\300' stuff.
-     * Keep memory usage within limits by allocating temporary
-     * storage and using heap for correct size at end.
+     * quotesub is used with QT_SINGLE_OPTIONAL.
+     * quotesub = 0:  mechanism not active
+     * quotesub = 1:  mechanism pending, no "'" yet;
+     *                needs adding at quotestart.
+     * quotesub = 2:  mechanism active, added opening "'"; need
+     *                closing "'".
      */
-    int alloclen = (instring == QT_BACKSLASH ? 7 : 4) * strlen(s) + 1;
-    char *buf = zshcalloc(alloclen);
-    int sf = 0;
+    int quotesub = 0;
+    char *quotestart;
     convchar_t cc;
     const char *uend;
 
-    DPUTS(instring < QT_BACKSLASH || instring > QT_DOLLARS,
+    switch (instring)
+    {
+    case QT_BACKSLASH:
+	/*
+	 * With QT_BACKSLASH we may need to use $'\300' stuff.
+	 * Keep memory usage within limits by allocating temporary
+	 * storage and using heap for correct size at end.
+	 */
+	alloclen = strlen(s) * 7 + 1;
+	break;
+
+    case QT_SINGLE_OPTIONAL:
+	/*
+	 * Here, we may need to add single quotes.
+	 */
+	alloclen = strlen(s) * 4 + 3;
+	quotesub = 1;
+	break;
+
+    default:
+	alloclen = strlen(s) * 4 + 1;
+	break;
+    }
+    tt = quotestart = v = buf = zshcalloc(alloclen);
+
+    DPUTS(instring < QT_BACKSLASH || instring == QT_BACKTICK ||
+	  instring > QT_SINGLE_OPTIONAL,
 	  "BUG: bad quote type in quotestring");
-    tt = v = buf;
     u = s;
     if (instring == QT_DOLLARS) {
 	/*
@@ -4519,12 +4554,64 @@
 		       (u[-1] == '=' || u[-1] == ':')) ||
 		      (*u == '~' && isset(EXTENDEDGLOB))) &&
 		     (instring == QT_BACKSLASH ||
+		      instring == QT_SINGLE_OPTIONAL ||
 		      (isset(BANGHIST) && *u == (char)bangchar &&
 		       instring != QT_SINGLE) ||
 		      (instring == QT_DOUBLE &&
 		       (*u == '$' || *u == '`' || *u == '\"' || *u == '\\')) ||
 		      (instring == QT_SINGLE && *u == '\''))) {
-		if (*u == '\n' || (instring == QT_SINGLE && *u == '\'')) {
+		if (instring == QT_SINGLE_OPTIONAL) {
+		    if (quotesub == 1) {
+			/*
+			 * We haven't yet had to quote at the start.
+			 */
+			if (*u == '\'') {
+			    /*
+			     * We don't need to.
+			     */
+			    *v++ = '\\';
+			} else {
+			    /*
+			     * It's now time to add quotes.
+			     */
+			    if (v > quotestart)
+			    {
+				char *addq;
+
+				for (addq = v; addq > quotestart; addq--)
+				    *addq = addq[-1];
+			    }
+			    *quotestart = '\'';
+			    v++;
+			    quotesub = 2;
+			}
+			*v++ = *u++;
+			/*
+			 * Next place to start quotes is here.
+			 */
+			quotestart = v;
+		    } else if (*u == '\'') {
+			if (unset(RCQUOTES)) {
+			    *v++ = '\'';
+			    *v++ = '\\';
+			    *v++ = '\'';
+			    /* Don't restart quotes unless we need them */
+			    quotesub = 1;
+			    quotestart = v;
+			} else {
+			    /* simplest just to use '' always */
+			    *v++ = '\'';
+			    *v++ = '\'';
+			}
+			/* dealt with */
+			u++;
+		    } else {
+			/* else already quoting, just add */
+			*v++ = *u++;
+		    }
+		    continue;
+		} else if (*u == '\n' ||
+			   (instring == QT_SINGLE && *u == '\'')) {
 		    if (unset(RCQUOTES)) {
 			*v++ = '\'';
 			if (*u == '\'')
@@ -4582,6 +4669,8 @@
 	    }
 	}
     }
+    if (quotesub == 2)
+	*v++ = '\'';
     *v = '\0';
 
     if (e && *e == u)
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.155
diff -u -r1.155 zsh.h
--- Src/zsh.h	24 Mar 2009 12:52:08 -0000	1.155
+++ Src/zsh.h	28 Apr 2009 16:21:13 -0000
@@ -210,8 +210,15 @@
      * in those cases where we need to represent a complete set.
      */
     QT_BACKTICK,
+    /*
+     * Single quotes, but the default is not to quote unless necessary.
+     * This is only useful as an argument to quotestring().
+     */
+    QT_SINGLE_OPTIONAL
 };
 
+#define QT_IS_SINGLE(x)	((x) == QT_SINGLE || (x) == QT_SINGLE_OPTIONAL)
+
 /*
  * Lexical tokens: unlike the character tokens above, these never
  * appear in strings and don't necessarily represent a single character.
Index: Test/D04parameter.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/D04parameter.ztst,v
retrieving revision 1.36
diff -u -r1.36 D04parameter.ztst
--- Test/D04parameter.ztst	9 Oct 2008 13:46:46 -0000	1.36
+++ Test/D04parameter.ztst	28 Apr 2009 16:21:13 -0000
@@ -321,7 +321,7 @@
   print -r ${(qqqq)foo}
 0:${(q...)...}
 >playing\ \'stupid\'\ \"games\"\ \\w\\i\\t\\h\ \$quoting.
->'playing '\''stupid'\'' "games" \w\i\t\h $quoting.'
+>'playing '\'stupid\'' "games" \w\i\t\h $quoting.'
 >"playing 'stupid' \"games\" \\w\\i\\t\\h \$quoting."
 >$'playing \'stupid\' "games" \\w\\i\\t\\h $quoting.'
 



-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070



Messages sorted by: Reverse Date, Date, Thread, Author