From 1551510993e6c71c8c344cd9377c0b5f95ef7174 Mon Sep 17 00:00:00 2001 From: "Steffen (Daode) Nurpmeso" Date: Mon, 4 Jul 2016 17:26:39 +0200 Subject: [PATCH] Sprinkle #ifdef HAVE_NATCH_CHAR; ensure \x/\u doesn't clash data.. and do support iconv(3) in non-Unicode environments. This is intermediate, but if you look around what you see is not better, most of the time. --- cc-test.sh | 2 +- nailfuns.h | 2 +- shexp.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++--------------- strings.c | 4 +-- 4 files changed, 73 insertions(+), 26 deletions(-) diff --git a/cc-test.sh b/cc-test.sh index cb86097d..1b13e61c 100755 --- a/cc-test.sh +++ b/cc-test.sh @@ -336,7 +336,7 @@ __behave_wysh() { #a b #a b #a - cksum_test behave:wysh_c "${MBOX}" '2932758085 320' + cksum_test behave:wysh_c "${MBOX}" '1473887148 321' ${rm} -f "${BODY}" "${MBOX}" } diff --git a/nailfuns.h b/nailfuns.h index d8f002e0..86a94934 100644 --- a/nailfuns.h +++ b/nailfuns.h @@ -2104,7 +2104,7 @@ FL ui32_t n_utf8_to_utf32(char const **bdat, size_t *blen); #endif /* buf must be large enough also for NUL, it's new length will be returned */ -#ifdef HAVE_FILTER_HTML_TAGSOUP +#if defined HAVE_NATCH_CHAR || defined HAVE_ICONV FL size_t n_utf32_to_utf8(ui32_t c, char *buf); #endif diff --git a/shexp.c b/shexp.c index 853a7380..2e60d426 100644 --- a/shexp.c +++ b/shexp.c @@ -568,6 +568,9 @@ jleave: FL enum n_shexp_state n_shell_parse_token(struct n_string *store, struct str *input, enum n_shexp_parse_flags flags){ +#if defined HAVE_NATCH_CHAR || defined HAVE_ICONV + char utf[8]; +#endif char c2, c, quotec; bool_t skipq, surplus; enum n_shexp_state rv; @@ -744,7 +747,13 @@ jrestart_empty: "%.*s\n"), (int)input->l, input->s); rv |= n_SHEXP_STATE_ERR_NUMBER; --il, ++ib; - goto je_ib_save; + /* Write unchanged */ +je_ib_save: + rv |= n_SHEXP_STATE_OUTPUT; + if(!(flags & n_SHEXP_PARSE_DRYRUN)) + store = n_string_push_buf(store, ib_save, + PTR2SIZE(ib - ib_save)); + continue; } c2 = (c2 << 3) | (c -= '0'); --il, ++ib; @@ -802,7 +811,7 @@ jrestart_empty: } /* Unicode massage */ - if(c2 != 'U' && c2 != 'u'){ + if((c2 != 'U' && c2 != 'u') || n_uasciichar(no)){ if((c = (char)no) == '\0') skipq = TRU1; }else if(no == 0) @@ -817,30 +826,44 @@ jrestart_empty: n_err(_("\"\\U\" argument exceeds 0x10FFFF: " "%.*s\n"), (int)input->l, input->s); rv |= n_SHEXP_STATE_ERR_NUMBER; - goto je_ib_save; - }else if((options & OPT_UNICODE) || - (c2 = n_uasciichar(no))){ - char utf[8]; - - rv |= n_SHEXP_STATE_OUTPUT; - if(!c2) - rv |= n_SHEXP_STATE_UNICODE; - j = n_utf32_to_utf8(no, utf); + /* But normalize the output anyway */ + goto Je_uni_norm; + } + +#if defined HAVE_NATCH_CHAR || defined HAVE_ICONV + j = n_utf32_to_utf8(no, utf); +#endif +#ifdef HAVE_NATCH_CHAR + if(options & OPT_UNICODE){ + rv |= n_SHEXP_STATE_OUTPUT | n_SHEXP_STATE_UNICODE; if(!(flags & n_SHEXP_PARSE_DRYRUN)) store = n_string_push_buf(store, utf, j); - }else{ - if(!(flags & n_SHEXP_PARSE_DRYRUN)) - rv |= n_SHEXP_STATE_ERR_UNICODE; - /* Write unchanged */ -je_ib_save: - rv |= n_SHEXP_STATE_OUTPUT; - if(!(flags & n_SHEXP_PARSE_DRYRUN)) - store = n_string_push_buf(store, ib_save, - PTR2SIZE(ib - ib_save)); continue; } - if(n_uasciichar(no) && cntrlchar(no)) /* TODO ctext */ - rv |= n_SHEXP_STATE_CONTROL; +#endif +#ifdef HAVE_ICONV + /* C99 */{ + char *icp; + + icp = n_iconv_onetime_cp(NULL, NULL, utf, FAL0); + if(icp != NULL){ + rv |= n_SHEXP_STATE_OUTPUT; + if(!(flags & n_SHEXP_PARSE_DRYRUN)) + store = n_string_push_cp(store, icp); + continue; + } + } +#endif + if(!(flags & n_SHEXP_PARSE_DRYRUN)) Je_uni_norm:{ + char itoa[32]; + + rv |= n_SHEXP_STATE_OUTPUT | + n_SHEXP_STATE_ERR_UNICODE; + i = snprintf(itoa, sizeof itoa, "\\%c%0*X", + (no > 0xFFFFu ? 'U' : 'u'), + (int)(no > 0xFFFFu ? 8 : 4), (ui32_t)no); + store = n_string_push_buf(store, itoa, i); + } continue; } if(skipq) @@ -1037,8 +1060,32 @@ n_shell_quote(struct n_string *store, struct str const *input){ store = n_string_push_c(store, c); continue; }else if(c != '\''){ +#ifdef HAVE_NATCH_CHAR + if(options & OPT_UNICODE){ + ui32_t u; + char const *ib2 = &ib[i]; + size_t il2 = il - i, il3 = il2; + + if((u = n_utf8_to_utf32(&ib2, &il2)) != UI32_MAX){ + char itoa[32]; + + il2 = -((siz_t)il2 - (siz_t)il3); + i += --il2; + il3 = snprintf(itoa, sizeof itoa, "%c%0*X", + (u > 0xFFFFu ? 'U' : 'u'), + (int)(u > 0xFFFFu ? 8 : 4), u); + store = n_string_push_buf(store, itoa, il3); + goto juseq; + } + } +#endif store = n_string_push_buf(store, "xFF", sizeof("xFF") -1); n_c_to_hex_base16(&store->s_dat[store->s_len - 2], c); +#ifdef HAVE_NATCH_CHAR +juseq: +#endif + if(i + 1 < il && hexchar(ib[i + 1])) + store = n_string_push_buf(store, "'$'", sizeof("'$'") -1); continue; } } diff --git a/strings.c b/strings.c index ce5d10fb..e3f3da21 100644 --- a/strings.c +++ b/strings.c @@ -810,7 +810,7 @@ jerr: } #endif /* HAVE_NATCH_CHAR */ -#ifdef HAVE_FILTER_HTML_TAGSOUP +#if defined HAVE_NATCH_CHAR || defined HAVE_ICONV FL size_t n_utf32_to_utf8(ui32_t c, char *buf) { @@ -866,7 +866,7 @@ j0: NYD2_LEAVE; return l; } -#endif /* HAVE_FILTER_HTML_TAGSOUP */ +#endif /* HAVE_NATCH_CHAR || HAVE_ICONV */ /* * Our iconv(3) wrapper -- 2.11.4.GIT