From ab0cd3b8cba25d48f93b065c69b418b134f06417 Mon Sep 17 00:00:00 2001 From: Steffen Nurpmeso Date: Fri, 20 Oct 2017 01:55:43 +0200 Subject: [PATCH] FIX iconv for main body part (since EVER!) (Doug McIlroy, Random832).. On the TUHS list Doug McIlroy responded to a mail of mine and commented on faulty character set conversion artifacts, and it turns out that the codebase has never been truly prepared for faulty character set conversions regarding destination character sets (non-convertible characters). Never. Getting this going before the v15 filter-based rewrite is pretty much hacky in sofar as the topmost level should create a filter chain for the exact purpose (send-to-display, -file, -mbox, or create new message) which can the react truly accordingly. This could also involve the user in interactive mode by asking him whether something should be reencoded in the -to-file cases, for example. Not yet, that. --- cc-test.sh | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- mime.c | 49 ++++++++++++++++++++-------------- nail.h | 1 + nailfuns.h | 5 +++- sendout.c | 5 ++-- strings.c | 1 + 6 files changed, 121 insertions(+), 29 deletions(-) diff --git a/cc-test.sh b/cc-test.sh index 6bc085c6..45f535c0 100755 --- a/cc-test.sh +++ b/cc-test.sh @@ -244,11 +244,25 @@ err() { } ex0_test() { - [ $? -ne 0 ] && err $1 'unexpected non-0 exit status' + # $1=test name [$2=status] + __qm__=${?} + [ ${#} -gt 1 ] && __qm__=${2} + if [ ${__qm__} -ne 0 ]; then + err $1 'unexpected non-0 exit status' + else + printf '%s: ok\n' "${1}" + fi } exn0_test() { - [ $? -eq 0 ] && err $1 'unexpected 0 exit status' + # $1=test name [$2=status] + __qm__=${?} + [ ${#} -gt 1 ] && __qm__=${2} + if [ ${__qm__} -eq 0 ]; then + err $1 'unexpected 0 exit status' + else + printf '%s: ok\n' "${1}" + fi } if ( [ "$((1 + 1))" = 2 ] ) >/dev/null 2>&1; then @@ -310,6 +324,7 @@ t_behave() { t_behave_mass_recipients t_behave_lreply_futh_rth_etc t_behave_iconv_mbyte_base64 + t_behave_iconv_mainbody } t_behave_X_opt_input_command_stack() { @@ -4161,8 +4176,8 @@ t_behave_iconv_mbyte_base64() { TRAP_EXIT_ADDONS="./.t*" if [ -n "${UTF8_LOCALE}" ] && have_feat iconv && - ( iconv -l | ${grep} -i -e iso-2022-jp -e euc-jp) >/dev/null 2>&1 - then + (/dev/null 2>&1 || + (/dev/null 2>&1; then : else echo 'behave:iconv_mbyte_base64: unsupported, skipped' @@ -4176,7 +4191,7 @@ t_behave_iconv_mbyte_base64() { _EOT chmod 0755 ./.tsendmail.sh - if ( iconv -l | ${grep} -i iso-2022-jp ) >/dev/null 2>&1; then + if (/dev/null 2>&1; then cat <<-'_EOT' | LC_ALL=${UTF8_LOCALE} ${MAILX} ${ARGS} \ -Smta=./.tsendmail.sh \ -Sescape=! -Smime-encoding=base64 2>./.terr @@ -4217,7 +4232,7 @@ t_behave_iconv_mbyte_base64() { echo 'behave:iconv_mbyte_base64: ISO-2022-JP unsupported, skipping 1-4' fi - if ( iconv -l | ${grep} -i euc-jp ) >/dev/null 2>&1; then + if (/dev/null 2>&1; then rm -f "${MBOX}" ./.twrite cat <<-'_EOT' | LC_ALL=${UTF8_LOCALE} ${MAILX} ${ARGS} \ -Smta=./.tsendmail.sh \ @@ -4262,6 +4277,68 @@ t_behave_iconv_mbyte_base64() { t_epilog } +t_behave_iconv_mainbody() { + t_prolog t_behave_iconv_mainbody + TRAP_EXIT_ADDONS="./.t*" + + i= + if have_feat iconv && + (/dev/null 2>&1; then + j="`printf '–' | iconv -f utf-8 -t ascii 2>/dev/null`" + # This assumes iconv(1) behaves like iconv(3), but well + if [ ${?} -ne 0 ]; then + i=1 + elif [ x"${j}" = 'x?' ]; then + i=2 + elif [ x"${j}" = 'x*' ]; then + i=3 + fi + fi + if [ -z "${i}" ]; then + echo 'behave:iconv_mainbody: unsupported, skipped' + return + fi + + ${cat} <<-_EOT > ./.tsendmail.sh + #!${SHELL} - + (echo 'From HamamelisVirginiana Fri Oct 20 16:23:21 2017' && ${cat} && + echo) >> "${MBOX}" + _EOT + chmod 0755 ./.tsendmail.sh + + printf '–' | ${MAILX} ${ARGS} ${ADDARG_UNI} -Smta=./.tsendmail.sh \ + -S charset-7bit=us-ascii -S charset-8bit=utf-8 \ + -s '–' over-the@rain.bow 2>./.terr + check behave:iconv_mainbody-1 0 "${MBOX}" '3634015017 251' + check behave:iconv_mainbody-2 - ./.terr '4294967295 0' + + printf '–' | ${MAILX} ${ARGS} ${ADDARG_UNI} -Smta=./.tsendmail.sh \ + -S charset-7bit=us-ascii -S charset-8bit=us-ascii \ + -s '–' over-the@rain.bow 2>./.terr + exn0_test behave:iconv_mainbody-3 + check behave:iconv_mainbody-3 - "${MBOX}" '3634015017 251' + check behave:iconv_mainbody-4 - ./.terr '1960148192 128' + + printf 'p\nx\n' | ${MAILX} ${ARGS} -Rf "${MBOX}" >./.tout 2>./.terr + j=${?} + if [ ${i} -eq 1 ]; then + # yuck, just assume ???, we need a test program for that one! + ex0_test behave:iconv_mainbody-5-1 ${j} + check behave:iconv_mainbody-6-1 - ./.tout '1959197095 283' + check behave:iconv_mainbody-7-1 - ./.terr '4294967295 0' + elif [ ${i} -eq 2 ]; then + ex0_test behave:iconv_mainbody-5-2 ${j} + check behave:iconv_mainbody-6-2 - ./.tout '1959197095 283' + check behave:iconv_mainbody-7-2 - ./.terr '4294967295 0' + else + ex0_test behave:iconv_mainbody-5-3 ${j} + check behave:iconv_mainbody-6-3 - ./.tout '3196380198 279' + check behave:iconv_mainbody-7-3 - ./.terr '4294967295 0' + fi + + t_epilog +} + # t_content() # Some basic tests regarding correct sending of mails, via STDIN / -t / -q, # including basic MIME Content-Transfer-Encoding correctness (quoted-printable) diff --git a/mime.c b/mime.c index 5a4d5c0d..680e2ecf 100644 --- a/mime.c +++ b/mime.c @@ -67,7 +67,8 @@ static bool_t _name_highbit(struct name *np); #endif /* fwrite(3) while checking for displayability */ -static ssize_t _fwrite_td(struct str const *input, enum tdflags flags, +static ssize_t _fwrite_td(struct str const *input, + bool_t failiconv, enum tdflags flags, struct str *outrest, struct quoteflt *qf); /* Convert header fields to RFC 2047 format and write to the file fo */ @@ -142,8 +143,8 @@ __mimefwtd_onsig(int sig) /* TODO someday, we won't need it no more */ } static ssize_t -_fwrite_td(struct str const *input, enum tdflags flags, struct str *outrest, - struct quoteflt *qf) +_fwrite_td(struct str const *input, bool_t failiconv, enum tdflags flags, + struct str *outrest, struct quoteflt *qf) { /* TODO note: after send/MIME layer rewrite we will have a string pool * TODO so that memory allocation count drops down massively; for now, @@ -162,6 +163,7 @@ _fwrite_td(struct str const *input, enum tdflags flags, struct str *outrest, struct str in, out; ssize_t rv; NYD_ENTER; + n_UNUSED(failiconv); n_UNUSED(outrest); in = *input; @@ -183,29 +185,36 @@ _fwrite_td(struct str const *input, enum tdflags flags, struct str *outrest, outrest->l = 0; } - if((err = n_iconv_str(iconvd, n_ICONV_UNIDEFAULT, &out, &in, &in)) != 0 && - outrest != NULL && in.l > 0){ + rv = 0; + if((err = n_iconv_str(iconvd, + (failiconv ? n_ICONV_NONE : n_ICONV_UNIDEFAULT), + &out, &in, &in)) != 0){ if(err != n_ERR_INVAL) n_iconv_reset(iconvd); - /* Incomplete multibyte at EOF is special xxx _INVAL? */ - if (flags & _TD_EOF) { - out.s = srealloc(out.s, out.l + sizeof(n_unirepl)); - if(n_psonce & n_PSO_UNICODE){ - memcpy(&out.s[out.l], n_unirepl, sizeof(n_unirepl) -1); - out.l += sizeof(n_unirepl) -1; - }else - out.s[out.l++] = '?'; - } else - n_str_add(outrest, &in); + if(outrest != NULL && in.l > 0){ + /* Incomplete multibyte at EOF is special xxx _INVAL? */ + if (flags & _TD_EOF) { + out.s = srealloc(out.s, out.l + sizeof(n_unirepl)); + if(n_psonce & n_PSO_UNICODE){ + memcpy(&out.s[out.l], n_unirepl, sizeof(n_unirepl) -1); + out.l += sizeof(n_unirepl) -1; + }else + out.s[out.l++] = '?'; + } else + n_str_add(outrest, &in); + }else + rv = -1; } in = out; out.l = 0; out.s = NULL; flags &= ~_TD_BUFCOPY; - if (buf != NULL) + if(buf != NULL) free(buf); + if(rv < 0) + goto jleave; }else #endif /* HAVE_ICONV */ /* Else, if we will modify the data bytes and thus introduce the potential @@ -721,7 +730,7 @@ a_mime__convhdra(struct str *inp, FILE *fp, size_t *colp, if(inp->l > 0 && iconvd != (iconv_t)-1){ ciconv.l = 0; - if(n_iconv_str(iconvd, n_ICONV_IGN_NOREVERSE, &ciconv, inp, NULL) != 0){ + if(n_iconv_str(iconvd, n_ICONV_NONE, &ciconv, inp, NULL) != 0){ n_iconv_reset(iconvd); goto jleave; } @@ -1226,7 +1235,7 @@ mime_write(char const *ptr, size_t size, FILE *f, if ((dflags & TD_ICONV) && iconvd != (iconv_t)-1 && (convert == CONV_TOQP || convert == CONV_8BIT || convert == CONV_TOB64 || convert == CONV_TOHDR)) { - if (n_iconv_str(iconvd, n_ICONV_IGN_NOREVERSE, &out, &in, NULL) != 0) { + if (n_iconv_str(iconvd, n_ICONV_NONE, &out, &in, NULL) != 0) { n_iconv_reset(iconvd); /* TODO This causes hard-failure. We would need to have an action * TODO policy FAIL|IGNORE|SETERROR(but continue) */ @@ -1299,7 +1308,7 @@ jeb64: jqpb64_dec: if ((sz = out.l) != 0) { ui32_t opl = qf->qf_pfix_len; - sz = _fwrite_td(&out, (dflags & ~_TD_BUFCOPY), outrest, qf); + sz = _fwrite_td(&out, FAL0, (dflags & ~_TD_BUFCOPY), outrest, qf); qf->qf_pfix_len = opl; } break; @@ -1362,7 +1371,7 @@ jqpb64_enc: sz = mime_write_tohdr_a(&in, f, &col); } break; default: - sz = _fwrite_td(&in, dflags, NULL, qf); + sz = _fwrite_td(&in, TRU1, dflags, NULL, qf); break; } diff --git a/nail.h b/nail.h index 40b3c0d4..0310426a 100644 --- a/nail.h +++ b/nail.h @@ -2572,6 +2572,7 @@ VL ui32_t n_pstate; /* Bits of enum n_program_state */ VL si32_t n_pstate_err_no; /* What backs $! n_ERR_* TODO ..HACK */ VL si32_t n_pstate_ex_no; /* What backs $? n_EX_* TODO ..HACK ->64-bit */ #define n_err_no errno /* Don't use errno directly, for later XXX */ +VL si32_t n_iconv_err_no; /* TODO HACK: part of CTX to not get lost */ /* XXX stylish sorting */ VL int msgCount; /* Count of messages read in */ diff --git a/nailfuns.h b/nailfuns.h index 3a3eb91b..23ec3aa6 100644 --- a/nailfuns.h +++ b/nailfuns.h @@ -2410,7 +2410,10 @@ FL void n_iconv_reset(iconv_t cd); * iconv_str() auto-grows on ERR_2BIG errors; in and in_rest_or_null may be * the same object. * Note: ERR_INVAL (incomplete sequence at end of input) is NOT handled, so the - * replacement character must be added manually if that happens at EOF! */ + * replacement character must be added manually if that happens at EOF! + * TODO These must be contexts. For now we duplicate n_err_no into + * TODO n_iconv_err_no in order to be able to access it when stuff happens + * TODO "in between"! */ FL int n_iconv_buf(iconv_t cd, enum n_iconv_flags icf, char const **inb, size_t *inbleft, char **outb, size_t *outbleft); diff --git a/sendout.c b/sendout.c index 16128842..b011c417 100644 --- a/sendout.c +++ b/sendout.c @@ -1865,12 +1865,13 @@ mail1(struct header *hp, int printheaders, struct message *quote, ; else if ((nmtf = infix(hp, mtf)) != NULL) break; - else if ((err = n_err_no) == n_ERR_ILSEQ || err == n_ERR_INVAL) { + else if ((err = n_iconv_err_no) == n_ERR_ILSEQ || err == n_ERR_INVAL || + err == n_ERR_NOENT) { rewind(mtf); continue; } - n_perr(_("Failed to create encoded message"), 0); + n_perr(_("Failed to create encoded message"), err); n_pstate_err_no = n_ERR_NOTSUP; goto jfail_dead; } diff --git a/strings.c b/strings.c index 19aa1b14..453329c4 100644 --- a/strings.c +++ b/strings.c @@ -1275,6 +1275,7 @@ n_iconv_buf(iconv_t cd, enum n_iconv_flags icf, } err = 0; jleave: + n_iconv_err_no = err; NYD2_LEAVE; return err; } -- 2.11.4.GIT