Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

[PATCH] fix failure of D07multibyte on Mac OS X



D07multibyte fails on Mac OS X since the following commit:

commit b237ba0a8eaa5001283ac8448872021723b90aff
Author: Peter Stephenson <pws@xxxxxxx>
Date:   Fri Feb 20 16:25:47 2015 +0000

    34587: ensure multibyte characters don't overflow.

The failure is as follows:

*** 1 ****
! ./test_bad_param:1: command not found: $\M-i#
--- 1 ----
! ./test_bad_param:1: command not found: $?#

The character '?' above is 0xe9.

The problem is not in the commit but in a strange behavior of
isprint() on Mac OS X; under UTF-8 locale, it returns true for all the
characters in the range from 0xa0 to 0xff. Thus 0xe9 is printed as is.
(it seems isprint(c) is behaving like iswprint(c))

The problem has been there for a long time; any character which should
be printed as \M-x has been printed as a raw byte (as if PRINT_EIGHT_BIT
is on).

I feel this is a bug of Apple's isprint(), but they may have a
different opinion. Anyway, I thinks the only possible workaround is to
replace the broken isprint() by an alternative.

In the following patch most of the stuff are borrowed from the handling
of BROKEN_WCWIDTH. In order to minimize the possibility of breaking
anything on platforms other than Mac, isprint() is replaced by an
alternative only if isprint() is broken *and* building on Mac OS X,
assuming the problem exists only on Mac.

Any comments?

Jun


diff --git a/Src/compat.c b/Src/compat.c
index b0bcb62..21e2a5e 100644
--- a/Src/compat.c
+++ b/Src/compat.c
@@ -951,3 +951,19 @@ int mk_wcswidth_cjk(const wchar_t *pwcs, size_t n)
 /**/
 #endif /* BROKEN_WCWIDTH && (__STDC_ISO_10646__ || __APPLE__) */
 
+/**/
+#if defined(__APPLE__) && defined(BROKEN_ISPRINT)
+
+/**/
+int
+isprint_ascii(int c)
+{
+    char *locale = setlocale(LC_CTYPE, NULL);
+    if (strcasestr(locale, "utf-8") || strcasestr(locale, "utf8"))
+	return (c >= 0x20 && c <= 0x7e);
+    else
+	return isprint(c);
+}
+
+/**/
+#endif /* __APPLE__ && BROKEN_ISPRINT */
diff --git a/Src/pattern.c b/Src/pattern.c
index df5e602..17cd40c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -3622,7 +3622,7 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
 		    return 1;
 		break;
 	    case PP_PRINT:
-		if (isprint(ch))
+		if (ISPRINT(ch))
 		    return 1;
 		break;
 	    case PP_PUNCT:
diff --git a/Src/utils.c b/Src/utils.c
index 1bcceb0..3d12807 100644
--- a/Src/utils.c
+++ b/Src/utils.c
@@ -414,7 +414,7 @@ nicechar(int c)
     static char buf[6];
     char *s = buf;
     c &= 0xff;
-    if (isprint(c))
+    if (ISPRINT(c))
 	goto done;
     if (c & 0x80) {
 	if (isset(PRINTEIGHTBIT))
@@ -423,7 +423,7 @@ nicechar(int c)
 	*s++ = 'M';
 	*s++ = '-';
 	c &= 0x7f;
-	if(isprint(c))
+	if(ISPRINT(c))
 	    goto done;
     }
     if (c == 0x7f) {
diff --git a/Src/ztype.h b/Src/ztype.h
index eef0f23..d1bef0a 100644
--- a/Src/ztype.h
+++ b/Src/ztype.h
@@ -75,3 +75,9 @@
 #define WC_ZISTYPE(X,Y)	zistype((X),(Y))
 #define WC_ISPRINT(X)	isprint(X)
 #endif
+
+#if defined(__APPLE__) && defined(BROKEN_ISPRINT)
+#define ISPRINT(c)  isprint_ascii(c)
+#else
+#define ISPRINT(c)  isprint(c)
+#endif
diff --git a/configure.ac b/configure.ac
index bfc02b2..7e770cd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2567,6 +2567,8 @@ AH_TEMPLATE([MULTIBYTE_SUPPORT],
 [Define to 1 if you want support for multibyte character sets.])
 AH_TEMPLATE([BROKEN_WCWIDTH],
 [Define to 1 if the wcwidth() function is present but broken.])
+AH_TEMPLATE([BROKEN_ISPRINT],
+[Define to 1 if the isprint() function is broken under UTF-8 locale.])
 if test x$zsh_cv_c_unicode_support = xyes; then
   AC_DEFINE(MULTIBYTE_SUPPORT)
 
@@ -2622,6 +2624,38 @@ if test x$zsh_cv_c_unicode_support = xyes; then
   if test x$zsh_cv_c_broken_wcwidth = xyes; then
     AC_DEFINE(BROKEN_WCWIDTH)
   fi
+
+  dnl Check if isprint() behaves correctly under UTF-8 locale.
+  dnl On some platform (maybe only on Mac OS X), isprint() returns
+  dnl true for all characters in the range from 0xa0 to 0xff if
+  dnl called under UTF-8 locale.
+  [locale_prog='char *my_locales[] = {
+  "en_US.UTF-8", "en_GB.UTF-8", "en.UTF-8", '
+  locale_prog="$locale_prog"`locale -a 2>/dev/null | \
+    sed -e 's/utf8/UTF-8/' | grep UTF-8 | \
+    while read line; do echo " \"$line\","; done;`
+  locale_prog="$locale_prog 0 };
+  #include <locale.h>
+  #include <ctype.h>
+
+  int main() {
+    char **localep;
+    for (localep = my_locales; *localep; localep++)
+      if (setlocale(LC_ALL, *localep) && isprint(0xa0))
+	return 0;
+    return 1;
+  }
+  "]
+
+  AC_CACHE_CHECK(if the isprint() function is broken,
+  zsh_cv_c_broken_isprint,
+  [AC_TRY_RUN([$locale_prog],
+  zsh_cv_c_broken_isprint=yes,
+  zsh_cv_c_broken_isprint=no,
+  zsh_cv_c_broken_isprint=no)])
+  if test x$zsh_cv_c_broken_isprint = xyes; then
+    AC_DEFINE(BROKEN_ISPRINT)
+  fi
 fi
 
 dnl





Messages sorted by: Reverse Date, Date, Thread, Author