Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: POSIX_STRINGS option



There's been some POSIX-directed discussion in the Austin group that a
NULL character in a $'...' style string should terminate the string at
that point.  This isn't in the standard yet, but bash already does it
this way, so I might as well add this for compatibility now.
(Apparently ksh currently truncates the entire string, not just the
quoted string, at that point, but is likely to change.)

I've assumed this will be a POSIX requirement and named it so that
anything similar that comes up can be associated with this option.

I can't think of a good reason for setting the option deliberately, it
just makes \0 within $'...' essentially useless.

Index: Doc/Zsh/options.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/options.yo,v
retrieving revision 1.96
diff -p -u -r1.96 options.yo
--- Doc/Zsh/options.yo	2 Oct 2010 21:03:04 -0000	1.96
+++ Doc/Zsh/options.yo	19 Nov 2010 18:01:08 -0000
@@ -1891,6 +1891,33 @@ If multibyte character support is not co
 ignored; all octets with the top bit set may be used in identifiers.
 This is non-standard but is the traditional zsh behaviour.
 )
+pindex(POSIX_STRINGS)
+pindex(NO_POSIX_STRINGS)
+pindex(POSIXSTRINGS)
+pindex(NOPOSIXSTRINGS)
+cindex(discarding embedded nulls in $'...')
+cindex(embedded nulls, in $'...')
+cindex(nulls, embedded in $'...')
+item(tt(POSIX_STRINGS) <K> <S>)(
+This option affects processing of quoted strings.  Currently it only
+affects the behaviour of null characters, i.e. character 0 in the
+portable character set corresponding to US ASCII.
+
+When this option is not set, null characters embedded within strings
+of the form tt($')var(...)tt(') are treated as ordinary characters. The
+entire string is maintained within the shell and output to files where
+necessary, although owing to restrictions of the library interface
+the string is truncated at the null character in file names, environment
+variables, or in arguments to external programs.
+
+When this option is set, the tt($')var(...)tt(') expression is truncated at
+the null character.  Note that remaining parts of the same string
+beyond the termination of the quotes are not trunctated.
+
+For example, the command line argument tt(a$'b\0c'd) is treated with
+the option off as the characters tt(a), tt(b), null, tt(c), tt(d),
+and with the option on as the characters tt(a), tt(b), tt(d).
+)
 pindex(POSIX_TRAPS)
 pindex(NO_POSIX_TRAPS)
 pindex(POSIXTRAPS)
Index: Src/options.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/options.c,v
retrieving revision 1.56
diff -p -u -r1.56 options.c
--- Src/options.c	2 Oct 2010 21:03:04 -0000	1.56
+++ Src/options.c	19 Nov 2010 18:01:08 -0000
@@ -205,7 +205,8 @@ static struct optname optns[] = {
 {{NULL, "posixcd",            OPT_EMULATE|OPT_BOURNE},	 POSIXCD},
 {{NULL, "posixidentifiers",   OPT_EMULATE|OPT_BOURNE},	 POSIXIDENTIFIERS},
 {{NULL, "posixjobs",          OPT_EMULATE|OPT_BOURNE},	 POSIXJOBS},
-{{NULL, "posixtraps",          OPT_EMULATE|OPT_BOURNE},	 POSIXTRAPS},
+{{NULL, "posixstrings",       OPT_EMULATE|OPT_BOURNE},   POSIXSTRINGS},
+{{NULL, "posixtraps",         OPT_EMULATE|OPT_BOURNE},	 POSIXTRAPS},
 {{NULL, "printeightbit",      0},                        PRINTEIGHTBIT},
 {{NULL, "printexitvalue",     0},			 PRINTEXITVALUE},
 {{NULL, "privileged",	      OPT_SPECIAL},		 PRIVILEGED},
Index: Src/utils.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/utils.c,v
retrieving revision 1.246
diff -p -u -r1.246 utils.c
--- Src/utils.c	15 Oct 2010 18:56:17 -0000	1.246
+++ Src/utils.c	19 Nov 2010 18:01:09 -0000
@@ -5200,7 +5200,7 @@ getkeystring(char *s, int *len, int how,
     char *buf, tmp[1];
     char *t, *tdest = NULL, *u = NULL, *sstart = s, *tbuf = NULL;
     char svchar = '\0';
-    int meta = 0, control = 0;
+    int meta = 0, control = 0, ignoring = 0;
     int i;
 #if defined(HAVE_WCHAR_H) && defined(HAVE_WCTOMB) && defined(__STDC_ISO_10646__)
     wint_t wval;
@@ -5623,11 +5623,22 @@ getkeystring(char *s, int *len, int how,
 	if (how & GETKEY_DOLLAR_QUOTE) {
 	    char *t2;
 	    for (t2 = tbuf; t2 < t; t2++) {
+		/*
+		 * In POSIX mode, an embedded NULL is discarded and
+		 * terminates processing.  It just does, that's why.
+		 */
+		if (isset(POSIXSTRINGS)) {
+		    if (*t2 == '\0')
+			ignoring = 1;
+		    if (ignoring)
+			break;
+		}
 		if (imeta(*t2)) {
 		    *tdest++ = Meta;
 		    *tdest++ = *t2 ^ 32;
-		} else
+		} else {
 		    *tdest++ = *t2;
+		}
 	    }
 	    /*
 	     * Reset use of temporary buffer.
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.169
diff -p -u -r1.169 zsh.h
--- Src/zsh.h	2 Oct 2010 21:03:04 -0000	1.169
+++ Src/zsh.h	19 Nov 2010 18:01:09 -0000
@@ -1992,6 +1992,7 @@ enum {
     POSIXCD,
     POSIXIDENTIFIERS,
     POSIXJOBS,
+    POSIXSTRINGS,
     POSIXTRAPS,
     PRINTEIGHTBIT,
     PRINTEXITVALUE,
Index: Test/A03quoting.ztst
===================================================================
RCS file: /cvsroot/zsh/zsh/Test/A03quoting.ztst,v
retrieving revision 1.4
diff -p -u -r1.4 A03quoting.ztst
--- Test/A03quoting.ztst	6 Nov 2007 20:45:09 -0000	1.4
+++ Test/A03quoting.ztst	19 Nov 2010 18:01:09 -0000
@@ -42,6 +42,7 @@
   unsetopt rcquotes
 0:Yes RC_QUOTES with single quotes
 >'
+# ' Deconfuse Emacs quoting rules
 
   print '<\u0041>'
   printf '%s\n' $'<\u0042>'
@@ -52,3 +53,24 @@
 ><B>
 ><C>
 ><D>
+
+  null1="$(print -r a$'b\0c'd)"
+  null2="$(setopt posixstrings; print -r a$'b\0c'd)"
+  for string in $null1 $null2; do
+    print ":"
+    for (( i = 1; i <= $#string; i++ )); do
+      char=$string[$i]
+      print $(( [#16] #char ))
+    done
+  done
+0:Embedded null characters in $'...' strings.
+>:
+>16#61
+>16#62
+>16#0
+>16#63
+>16#64
+>:
+>16#61
+>16#62
+>16#64

-- 
Peter Stephenson <pws@xxxxxxx>            Software Engineer
Tel: +44 (0)1223 692070                   Cambridge Silicon Radio Limited
Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, UK


Member of the CSR plc group of companies. CSR plc registered in England and Wales, registered number 4187346, registered office Churchill House, Cambridge Business Park, Cowley Road, Cambridge, CB4 0WZ, United Kingdom



Messages sorted by: Reverse Date, Date, Thread, Author