diff options
Diffstat (limited to 'lib/sh')
| -rw-r--r-- | lib/sh/strtrans.c | 84 |
1 files changed, 43 insertions, 41 deletions
diff --git a/lib/sh/strtrans.c b/lib/sh/strtrans.c index af75dcfa..e3d67b6a 100644 --- a/lib/sh/strtrans.c +++ b/lib/sh/strtrans.c @@ -55,7 +55,7 @@ ansicstr (const char *string, size_t len, int flags, int *sawc, size_t *rlen) const char *s; unsigned long v; size_t clen; - int mb_cur_max; + size_t mb_cur_max; #if defined (HANDLE_MULTIBYTE) wchar_t wc; #endif @@ -63,7 +63,7 @@ ansicstr (const char *string, size_t len, int flags, int *sawc, size_t *rlen) if (string == 0 || *string == '\0') return ((char *)0); - mb_cur_max = MB_CUR_MAX; + mb_cur_max = locale_mb_cur_max; #if defined (HANDLE_MULTIBYTE) temp = 4*len + 4; if (temp < 12) @@ -79,10 +79,14 @@ ansicstr (const char *string, size_t len, int flags, int *sawc, size_t *rlen) { clen = 1; #if defined (HANDLE_MULTIBYTE) - if ((locale_utf8locale && (c & 0x80)) || - (locale_utf8locale == 0 && mb_cur_max > 0 && is_basic (c) == 0)) + /* We read an entire multibyte character at a time if we are in a + locale where a backslash can possibly appear as part of a + multibyte character. UTF-8 encodings prohibit this. */ + if (locale_utf8locale == 0 && mb_cur_max > 1 && is_basic (c) == 0) { clen = mbrtowc (&wc, s - 1, mb_cur_max, 0); + if (MB_NULLWCH (clen)) + break; /* it apparently can happen */ if (MB_INVALIDCH (clen)) clen = 1; } @@ -227,30 +231,24 @@ ansic_quote (const char *str, int flags, int *rlen) { char *r, *ret; const char *s; - size_t l, rsize; unsigned char c; +#if defined (HANDLE_MULTIBYTE) size_t clen; int b; -#if defined (HANDLE_MULTIBYTE) wchar_t wc; + DECLARE_MBSTATE; #endif if (str == 0 || *str == 0) return ((char *)0); - l = strlen (str); - rsize = 4 * l + 4; - r = ret = (char *)xmalloc (rsize); + r = ret = (char *)xmalloc (4 * strlen (str) + 4); *r++ = '$'; *r++ = '\''; for (s = str; c = *s; s++) { - b = 1; /* 1 == add backslash; 0 == no backslash */ - l = 1; - clen = 1; - switch (c) { case ESC: c = 'E'; break; @@ -266,39 +264,42 @@ ansic_quote (const char *str, int flags, int *rlen) break; default: #if defined (HANDLE_MULTIBYTE) - b = is_basic (c); - /* XXX - clen comparison to 0 is dicey */ - if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 || MB_INVALIDCH (clen) || iswprint (wc) == 0)) || - (b == 1 && ISPRINT (c) == 0)) -#else - if (ISPRINT (c) == 0) -#endif + if ((locale_utf8locale && (c & 0x80)) || + (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 0)) { - *r++ = '\\'; - *r++ = TOCHAR ((c >> 6) & 07); - *r++ = TOCHAR ((c >> 3) & 07); - *r++ = TOCHAR (c & 07); - continue; + clen = mbrtowc (&wc, s, locale_mb_cur_max, &state); + if (MB_NULLWCH (clen)) + goto quote_end; + if (MB_INVALIDCH (clen)) + INITIALIZE_MBSTATE; + else if (iswprint (wc)) + { + for (b = 0; b < (int)clen; b++) + *r++ = (unsigned char)s[b]; + s += clen - 1; /* -1 because of the increment above */ + continue; + } } - l = 0; - break; + else +#endif + if (ISPRINT (c)) + { + *r++ = c; + continue; + } + + *r++ = '\\'; + *r++ = TOCHAR ((c >> 6) & 07); + *r++ = TOCHAR ((c >> 3) & 07); + *r++ = TOCHAR (c & 07); + continue; } - if (b == 0 && clen == 0) - break; - if (l) - *r++ = '\\'; - - if (clen == 1) - *r++ = c; - else - { - for (b = 0; b < (int)clen; b++) - *r++ = (unsigned char)s[b]; - s += clen - 1; /* -1 because of the increment above */ - } + *r++ = '\\'; + *r++ = c; } +quote_end: *r++ = '\''; *r = '\0'; if (rlen) @@ -348,7 +349,8 @@ ansic_shouldquote (const char *string) for (s = string; c = *s; s++) { #if defined (HANDLE_MULTIBYTE) - if (is_basic (c) == 0) + if ((locale_utf8locale && (c & 0x80)) || + (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) == 0)) return (ansic_wshouldquote (s)); #endif if (ISPRINT (c) == 0) |
