Mailing-List: contact zsh-workers-help@zsh.org; run by ezmlm
Precedence: bulk
X-No-Archive: yes
List-Id: Zsh Workers List <zsh-workers.zsh.org>
List-Post: <mailto:zsh-workers@zsh.org>
List-Help: <mailto:zsh-workers-help@zsh.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on f.primenet.com.au
X-Spam-Level: 
X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham
	autolearn_force=no version=3.4.0
X-Biglobe-Sender: <takimoto-j@kba.biglobe.ne.jp>
Content-Type: text/plain; charset=utf-8
Mime-Version: 1.0 (Mac OS X Mail 7.3 \(1878.6\))
Subject: Re: Strange behavior of [[
From: "Jun T." <takimoto-j@kba.biglobe.ne.jp>
In-Reply-To: <5577AE8F.6060902@arthaud.me>
Date: Fri, 8 Jan 2016 22:09:28 +0900
Content-Transfer-Encoding: quoted-printable
Message-Id: <9BF380A3-CAEB-46FB-8598-4E80DF45E79D@kba.biglobe.ne.jp>
References: <5577AE8F.6060902@arthaud.me>
To: zsh-workers@zsh.org
X-Mailer: Apple Mail (2.1878.6)
X-Biglobe-Spnum: 59493
X-Seq: zsh-workers 37515

pcre.c has the same problem:

% setopt re_match_pcre
% [[ $'\ua0' =3D~ . ]] && echo OK
(zsh hangs; 100% CPU usage)

The following is a copy of the patch to regex.c in workers/35448.
Also added a simple test in V07pcre.ztst.

diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 2393cd1..aa5c8ed 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -190,18 +190,25 @@ zpcre_get_substrings(char *arg, int *ovec, int =
ret, char *matchvar,
 	if (want_begin_end) {
 	    char *ptr =3D arg;
 	    zlong offs =3D 0;
+	    int clen, leftlen;
=20
 	    /* Count the characters before the match */
-	    MB_METACHARINIT();
-	    while (ptr < arg + ovec[0]) {
+	    MB_CHARINIT();
+	    leftlen =3D ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr +=3D MB_METACHARLEN(ptr);
+		clen =3D MB_CHARLEN(ptr, leftlen);
+		ptr +=3D clen;
+		leftlen -=3D clen;
 	    }
 	    setiparam("MBEGIN", offs + !isset(KSHARRAYS));
 	    /* Add on the characters in the match */
-	    while (ptr < arg + ovec[1]) {
+	    leftlen =3D ovec[1] - ovec[0];
+	    while (leftlen) {
 		offs++;
-		ptr +=3D MB_METACHARLEN(ptr);
+		clen =3D MB_CHARLEN(ptr, leftlen);
+		ptr +=3D clen;
+		leftlen -=3D clen;
 	    }
 	    setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
 	    if (nelem) {
@@ -219,17 +226,23 @@ zpcre_get_substrings(char *arg, int *ovec, int =
ret, char *matchvar,
 		    ptr =3D arg;
 		    offs =3D 0;
 		    /* Find the start offset */
-		    MB_METACHARINIT();
-		    while (ptr < arg + ipair[0]) {
+		    MB_CHARINIT();
+		    leftlen =3D ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr +=3D MB_METACHARLEN(ptr);
+			clen =3D MB_CHARLEN(ptr, leftlen);
+			ptr +=3D clen;
+			leftlen -=3D clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS), 10);
 		    *bptr =3D ztrdup(buf);
 		    /* Continue to the end offset */
-		    while (ptr < arg + ipair[1]) {
+		    leftlen =3D ipair[1] - ipair[0];
+		    while (leftlen) {
 			offs++;
-			ptr +=3D MB_METACHARLEN(ptr);
+			clen =3D MB_CHARLEN(ptr, leftlen);
+			ptr +=3D clen;
+			leftlen -=3D clen;
 		    }
 		    convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
 		    *eptr =3D ztrdup(buf);
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ddfd3f5..3907756 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -37,6 +37,17 @@
 >o=E2=86=92b
 >=E2=86=92
=20
+  unset match mend
+  s=3D$'\u00a0'
+  [[ $s =3D~ '^.$' ]] && print OK
+  [[ A${s}B =3D~ .(.). && $match[1] =3D=3D $s ]] && print OK
+  [[ A${s}${s}B =3D~ A([^[:ascii:]]*)B && $mend[1] =3D=3D 3 ]] && print =
OK
+  unset s
+0:Raw IMETA characters in input string
+>OK
+>OK
+>OK
+
   [[ foo =3D~ f.+ ]] ; print $?
   [[ foo =3D~ x.+ ]] ; print $?
   [[ ! foo =3D~ f.+ ]] ; print $?


