From efb570f9639647f0a2cf312796b8f85b936b071a Mon Sep 17 00:00:00 2001 From: John Marino Date: Mon, 9 Nov 2015 22:50:27 +0100 Subject: [PATCH] locales: Enforce US-ASCII encoding (limited to 7-bit) The US-ASCII format was getting treated identically to POSIX. It is supposed to throw an ILSEQ errno if a value of 0x80 or greater is encountered, so let's bring back the "ASCII" handling. While here, change nl_codeset to return US-ASCII only when the encoding really is "US-ASCII". Before "C" and "POSIX" encoding returned this string, so now they return "POSIX". --- lib/libc/locale/Makefile.inc | 2 +- lib/libc/locale/ascii.c | 189 ++++++++++++++++++++++++++++++++++++++++ lib/libc/locale/mblocal.h | 1 + lib/libc/locale/nl_langinfo.c | 2 + lib/libc/locale/setrunelocale.c | 4 +- 5 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 lib/libc/locale/ascii.c diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index 1264482203..561e6b60a5 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -5,7 +5,7 @@ CMAPS+= ${.CURDIR}/locale/Symbol.map -SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ +SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ diff --git a/lib/libc/locale/ascii.c b/lib/libc/locale/ascii.c new file mode 100644 index 0000000000..a1c1a2100e --- /dev/null +++ b/lib/libc/locale/ascii.c @@ -0,0 +1,189 @@ +/* + * Copyright 2013 Garrett D'Amore + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Copyright (c) 2011 The FreeBSD Foundation + * All rights reserved. + * Portions of this software were developed by David Chisnall + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)none.c 8.1 (Berkeley) 6/4/93 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mblocal.h" + +static size_t _ascii_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static int _ascii_mbsinit(const mbstate_t *); +static size_t _ascii_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps __unused); +static size_t _ascii_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _ascii_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, + size_t, size_t, mbstate_t * __restrict); + +int +_ascii_init(struct xlocale_ctype *l, _RuneLocale *rl) +{ + + l->__mbrtowc = _ascii_mbrtowc; + l->__mbsinit = _ascii_mbsinit; + l->__mbsnrtowcs = _ascii_mbsnrtowcs; + l->__wcrtomb = _ascii_wcrtomb; + l->__wcsnrtombs = _ascii_wcsnrtombs; + l->runes = rl; + l->__mb_cur_max = 1; + l->__mb_sb_limit = 128; + return(0); +} + +static int +_ascii_mbsinit(const mbstate_t *ps __unused) +{ + /* + * Encoding is not state dependent - we are always in the + * initial state. + */ + return (1); +} + +static size_t +_ascii_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps __unused) +{ + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (0); + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + if (pwc != NULL) + *pwc = (unsigned char)*s; + return (*s == '\0' ? 0 : 1); +} + +static size_t +_ascii_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps __unused) +{ + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc < 0 || wc > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + *s = (unsigned char)wc; + return (1); +} + +static size_t +_ascii_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps __unused) +{ + const char *s; + size_t nchr; + + if (dst == NULL) { + s = memchr(*src, '\0', nms); + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + return (s != NULL ? s - *src : nms); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nms-- > 0) { + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = (unsigned char)*s++) == L'\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} + +static size_t +_ascii_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps __unused) +{ + const wchar_t *s; + size_t nchr; + + if (dst == NULL) { + for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { + if (*s < 0 || *s > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + } + return (s - *src); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nwc-- > 0) { + if (*s < 0 || *s > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = *s++) == '\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} diff --git a/lib/libc/locale/mblocal.h b/lib/libc/locale/mblocal.h index f109b5f94d..ad4bb7896f 100644 --- a/lib/libc/locale/mblocal.h +++ b/lib/libc/locale/mblocal.h @@ -80,6 +80,7 @@ int _GB2312_init(struct xlocale_ctype *, _RuneLocale *); int _GBK_init(struct xlocale_ctype *, _RuneLocale *); int _BIG5_init(struct xlocale_ctype *, _RuneLocale *); int _MSKanji_init(struct xlocale_ctype *, _RuneLocale *); +int _ascii_init(struct xlocale_ctype *, _RuneLocale *); typedef size_t (*mbrtowc_pfn_t)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); diff --git a/lib/libc/locale/nl_langinfo.c b/lib/libc/locale/nl_langinfo.c index 87f2175d89..8728d0f720 100644 --- a/lib/libc/locale/nl_langinfo.c +++ b/lib/libc/locale/nl_langinfo.c @@ -71,6 +71,8 @@ nl_langinfo_l(nl_item item, locale_t loc) else if (strcmp(s, "MSKanji") == 0) ret = "SJIS"; else if (strcmp(s, "NONE") == 0) + ret = "POSIX"; + else if (strcmp(s, "NONE:US-ASCII") == 0) ret = "US-ASCII"; else if (strncmp(s, "NONE:", 5) == 0) ret = (char *)(s + 5); diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index 5e28d56b56..596ba1a5f9 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -126,7 +126,9 @@ __setrunelocale(struct xlocale_ctype *l, const char *encoding) rl->__sputrune = NULL; rl->__sgetrune = NULL; - if (strncmp(rl->__encoding, "NONE", 4) == 0) + if (strcmp(rl->__encoding, "NONE:US-ASCII") == 0) + ret = _ascii_init(l, rl); + else if (strncmp(rl->__encoding, "NONE", 4) == 0) ret = _none_init(l, rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(l, rl); -- 2.11.4.GIT