Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

Re: a='foo"'; echo ${a/foo"/"bar} outputs bar



On Sun, Aug 6, 2023 at 7:36 PM Bart Schaefer <schaefer@xxxxxxxxxxxxxxxx> wrote:
>
> Circling back to this ...

... and again ...

> With an increased understanding of lex.c gained by stepping through
> variations of ${|...} a thousand times, I think I have identified two
> possible ways to address this.

Turns out there's a fundamental problem with ${var/pat/repl}, namely
that upon reaching paramsubst() the only recognized quoting of the
slash character is to precede it with a backslash. Conversely,
gettokstr() recognizes '...', "...", and $'...' quoting, and may
return STRING tokens containing backslashes that paramsubst() won't be
able to deal with.

Other things that have never worked are e.g. ${a/${b/c/d}/e}, which
gives bad substitution, and ${a/"${b/c/d}"/e} which AFAICT always
substitutes ${a} with no replacements, and ${a/"$( /bin/echo d )"/e}
which is bad pattern: "$(.  Fixing this properly would require doing a
full-fledged parse inside paramsubst(), rather than just a search for
unquoted backslashes.

Several of my attempts so far have caused at least Test/Y01 to fail in
comptesteval, so there is evidence for dependency on the existing
behavior.

> Something similar would have to be done with single quotes

I had an idea here that seems to work: while reading single-quoted
strings in gettokstr(), recognize when we're in a pattern and replace
all / with \/.  That only fixes '...' and $'...' at best, though.

So, back to workers/51202, my original reply on this thread:  handle
double-quotes in paramsubst(), leaving single quotes to gettokstr().

This still isn't perfect (see above regarding ${a/${b/c/d}/e}) but it
fixes the two Xfail tests in D04parameter and handles most of the
straightforward cases, including

a='/x/y/z' zsh -fc 'echo "${a/"/y/"/+}"'

and even the ${a/"${b/c/d}"/e} example, although it'll break on any
more double quotes inside other double quotes.

Other existing tests still pass.  Let me know what you think.
diff --git a/Src/lex.c b/Src/lex.c
index 33b17cc95..31b130b07 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -938,7 +938,7 @@ gettokstr(int c, int sub)
 {
     int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
     int intpos = 1, in_brace_param = 0, cmdsubst = 0;
-    int inquote, unmatched = 0;
+    int inquote, unmatched = 0, in_pattern = 0;
     enum lextok peek;
 #ifdef DEBUG
     int ocmdsp = cmdsp;
@@ -1160,7 +1160,7 @@ gettokstr(int c, int sub)
 	    if (bct-- == in_brace_param) {
 		if (cmdsubst)
 		    cmdpop();
-		in_brace_param = cmdsubst = 0;
+		in_brace_param = cmdsubst = in_pattern = 0;
 	    }
 	    c = Outbrace;
 	    break;
@@ -1309,7 +1309,8 @@ gettokstr(int c, int sub)
 			    lexbuf.ptr--, lexbuf.len--;
 			else
 			    break;
-		    }
+		    } else if (in_pattern && c == '/')
+			add(Bnull);
 		    add(c);
 		}
 		ALLOWHIST
@@ -1397,26 +1398,36 @@ gettokstr(int c, int sub)
 	     */
 	    c = Dash;
            break;
-       case LX2_BANG:
-           /*
-            * Same logic as Dash, for ! to perform negation in range.
-            */
-           if (seen_brct)
-               c = Bang;
-           else
-               c = '!';
-       }
-       add(c);
-       c = hgetc();
-       if (intpos)
+	case LX2_BANG:
+	    /*
+	     * Same logic as Dash, for ! to perform negation in range.
+	     */
+	    if (seen_brct)
+		c = Bang;
+	    else
+		c = '!';
+	case LX2_OTHER:
+	    if (in_brace_param) {
+		if (c == '/') {
+		    if (in_pattern == 0)
+			in_pattern = 2;
+		    else
+			--in_pattern;
+		}
+	    }
+	}
+	add(c);
+	c = hgetc();
+	if (intpos)
 	    intpos--;
-       if (lexstop)
+	if (lexstop)
 	    break;
-       if (!cmdsubst && in_brace_param && act == LX2_STRING &&
-	   (c == '|' || c == Bar || inblank(c))) {
-	   cmdsubst = in_brace_param;
-	   cmdpush(CS_CURSH);
-       }
+	if (!cmdsubst && in_brace_param && act == LX2_STRING &&
+	    (c == '|' || c == Bar || inblank(c))) {
+	    cmdsubst = in_brace_param;
+	    cmdpush(CS_CURSH);
+	} else if (in_pattern == 2 && c != '/')
+	    in_pattern = 1;
     }
   brk:
     if (errflag) {
diff --git a/Src/subst.c b/Src/subst.c
index cdbfc138a..419069f5a 100644
--- a/Src/subst.c
+++ b/Src/subst.c
@@ -3088,6 +3088,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
 			chuck(ptr);
 		    else
 			ptr++;
+		} else if (c == Dnull) {
+		    chuck(ptr);
+		    while (*ptr && *ptr != c)
+			ptr++;
+		    if (*ptr == Dnull)
+			chuck(ptr);
+		    ptr--;	/* Outer loop is about to increment */
 		}
 	    }
 	    replstr = (*ptr && ptr[1]) ? ptr+1 : "";
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index c2008582c..69a4fd3ec 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -2762,13 +2762,13 @@ F:behavior, see http://austingroupbugs.net/view.php?id=888
 
  slash='/'
  print -r -- x${slash/'/'}y
--Df:(users/28784) substituting a single-quoted backslash, part #1: slash
+0:(users/28784) substituting a single-quoted backslash, part #1: slash
 >xy
 
  single_quote="'"
- print -r -- x${single_quote/'/'}y
--Df:(users/28784) substituting a single-quoted backslash, part #2: single quote
->x/y
+ print -r -- x${single_quote/$'/'}y
+0:(users/28784) substituting a single-quoted backslash, part #2: single quote
+>x'y
 
  control="foobar"
  print -r -- x${control/'bar'}y


Messages sorted by: Reverse Date, Date, Thread, Author