From 832fafac03d1da5003d349606114455e282d3aa3 Mon Sep 17 00:00:00 2001 From: "Steffen \"Daode\" Nurpmeso" Date: Thu, 18 Oct 2012 21:08:39 +0200 Subject: [PATCH] Add IDNA support.. This changeset is one of the key commits for S-nail v13. It transparently adds encoding support for IDNA. --- catd/en_US | 5 +- def.h | 49 ++++++++------- extern.h | 2 +- head.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++-------------- names.c | 51 +++++++++++++--- 5 files changed, 228 insertions(+), 78 deletions(-) diff --git a/catd/en_US b/catd/en_US index 78a1b1cf..fcdb0f3f 100644 --- a/catd/en_US +++ b/catd/en_US @@ -192,8 +192,8 @@ The following ~ escapes are defined:\n\ 139 --- DELETED --- 140 %s version %s. Type ? for help.\n 141 \nInterrupt\n -142 %s contains invalid @@ sequence\n -143 %s contains invalid character '%s'\n +142 "%s" contains invalid %s sequence\n +143 "%s" contains invalid character %s\n 144 --- DELETED --- 145 detract asked to insert commas\n 146 Creation of temporary image @@ -334,3 +334,4 @@ The following ~ escapes are defined:\n\ 281 Message piping to <%s> failed\n 282 Message writing to <%s> failed: %s\n 283 write error +284 Invalid domain name: "%s", character %s\n diff --git a/def.h b/def.h index c782700a..9bd34914 100644 --- a/def.h +++ b/def.h @@ -442,25 +442,35 @@ struct header { */ enum nameflags { - NAME_NAME_SALLOC = 1<<0, /* .n_name is doped */ - NAME_FULLNAME_SALLOC = 1<<1, /* .n_fullname is doped */ - NAME_SKINNED = 1<<2, /* Is actually skin()ned */ - NAME_ADDRSPEC_CHECKED = 1<<3, - NAME_ADDRSPEC_ISFILE = 1<<4, /* is a file path */ - NAME_ADDRSPEC_ISPIPE = 1<<5, /* is a command for pipeing */ + NAME_NAME_SALLOC = 1<< 0, /* .n_name is doped */ + NAME_FULLNAME_SALLOC = 1<< 1, /* .n_fullname is doped */ + NAME_SKINNED = 1<< 2, /* Is actually skin()ned */ + NAME_IDNA = 1<< 3, /* IDNA was applied */ + NAME_ADDRSPEC_CHECKED = 1<< 4, /* Address has been .. and */ + NAME_ADDRSPEC_ISFILE = 1<< 5, /* ..is a file path */ + NAME_ADDRSPEC_ISPIPE = 1<< 6, /* ..is a command for piping */ NAME_ADDRSPEC_ISFILEORPIPE = NAME_ADDRSPEC_ISFILE | NAME_ADDRSPEC_ISPIPE, - NAME_ADDRSPEC_INVALID = 1<<6, /* An invalid addr-spec */ - NAME_ADDRSPEC_ERR_EMPTY = 1<<7, /* An empty string (or NULL) */ - NAME_ADDRSPEC_ERR_ATSEQ = 1<<8, /* Weird @ sequence */ - /* More on _ERR_ below */ - NAME_IDNA = 1<<9, /* IDNA convertion needed/applied */ - /* Bit range for storing a faulty character */ - _NAME_ADDRSPEC_ERR_MASKC = 0xFF << 16 -}; - -#define NAME_ADDRSPEC_ERR_GETC(F) (((F) & 0x00FF0000) >> 16) -#define NAME_ADDRSPEC_ERR_SETC(C) (((unsigned char)(C) & 0xFF) << 16) + NAME_ADDRSPEC_ERR_EMPTY = 1<< 7, /* An empty string (or NULL) */ + NAME_ADDRSPEC_ERR_ATSEQ = 1<< 8, /* Weird @ sequence */ + NAME_ADDRSPEC_ERR_CHAR = 1<< 9, /* Invalid character */ + NAME_ADDRSPEC_ERR_IDNA = 1<<10, /* IDNA convertion failed */ + NAME_ADDRSPEC_INVALID = NAME_ADDRSPEC_ERR_EMPTY | + NAME_ADDRSPEC_ERR_ATSEQ | + NAME_ADDRSPEC_ERR_CHAR | + NAME_ADDRSPEC_ERR_IDNA, + _NAME_SHIFTWC = 11, + _NAME_MAXWC = 0xFFFFF, + _NAME_MASKWC = _NAME_MAXWC << _NAME_SHIFTWC +}; + +/* In the !_ERR_EMPTY case, the failing character can be queried */ +#define NAME_ADDRSPEC_ERR_GETWC(F) \ + ((((unsigned int)(F) & _NAME_MASKWC) >> _NAME_SHIFTWC) & _NAME_MAXWC) +#define NAME_ADDRSPEC_ERR_SET(F, E, WC) \ +do (F) = ((F) & ~(NAME_ADDRSPEC_INVALID | _NAME_MASKWC)) | \ + (E) | (((unsigned int)(WC) & _NAME_MAXWC) << _NAME_SHIFTWC); \ +while (0) struct name { struct name *n_flink; /* Forward link in list. */ @@ -472,14 +482,13 @@ struct name { }; struct addrguts { - const char *ag_input; /* Input string as given */ + char const *ag_input; /* Input string as given */ size_t ag_ilen; /* strlen() of input */ size_t ag_iaddr_start; /* Start of address in .ag_input, */ - size_t ag_iaddr_end; /* its end (only if ! _FILEADDR) */ + size_t ag_iaddr_end; /* its end (addresses only) */ char *ag_skinned; /* Output (alloced if !=.ag_input) */ size_t ag_slen; /* strlen() of .ag_skinned */ size_t ag_sdom_start; /* Start of domain in .ag_skinned, */ - size_t ag_sdom_end; /* its end */ enum nameflags ag_n_flags; /* enum nameflags of .ag_skinned */ }; diff --git a/extern.h b/extern.h index 0caddc5b..47ac48bc 100644 --- a/extern.h +++ b/extern.h @@ -290,7 +290,7 @@ char *routeaddr(const char *name); #define is_fileorpipe_addr(NP) \ (((NP)->n_flags & NAME_ADDRSPEC_ISFILEORPIPE) != 0) int is_addr_invalid(struct name *np, int putmsg); -char *skinned_name(struct name *np); +char *skinned_name(struct name const*np); char *skin(char *name); int addrspec_with_guts(int doskin, char const *name, struct addrguts *agp); char *realname(char *name); diff --git a/head.c b/head.c index e13ec890..a52767b0 100644 --- a/head.c +++ b/head.c @@ -39,8 +39,15 @@ #include "rcv.h" #include "extern.h" + #include +#ifdef USE_IDNA +# include +# include +# include +#endif + /* * Mail -- a mail program * @@ -51,6 +58,9 @@ static char * copyin(char *src, char **space); static char * nextword(char *wp, char *wbuf); static int gethfield(FILE *f, char **linebuf, size_t *linesize, int rem, char **colon); +#ifdef USE_IDNA +static struct addrguts * idna_apply(struct addrguts *agp); +#endif static int addrspec_check(int doskin, struct addrguts *agp); static int msgidnextc(const char **cp, int *status); static int charcount(char *str, int c); @@ -590,27 +600,32 @@ routeaddr(const char *name) int is_addr_invalid(struct name *np, int putmsg) { - char *name = np->n_name; - int f = np->n_flags; + char cbuf[sizeof "'\\U12340'"], *name = np->n_name; + int f = np->n_flags, ok8bit = 1; + unsigned int c; + char const *fmt = "'\\x%02X'", *cs; if ((f & NAME_ADDRSPEC_INVALID) == 0 || ! putmsg || (f & NAME_ADDRSPEC_ERR_EMPTY) != 0) - ; + goto jleave; + + if (f & NAME_ADDRSPEC_ERR_IDNA) + cs = tr(284, "Invalid domain name: \"%s\", character %s\n"), + fmt = "'\\U%04X'", + ok8bit = 0; else if (f & NAME_ADDRSPEC_ERR_ATSEQ) - fprintf(stderr, tr(142, "%s contains invalid @@ sequence\n"), - name); - else { - char ce[sizeof(1ul)]; - unsigned char c = NAME_ADDRSPEC_ERR_GETC(f); - - if ((unsigned char)c >= 040 && (unsigned char)c <= 0177) - ce[0] = c, ce[1] = '\0'; - else - snprintf(ce, sizeof(ce), "\\%03o", (unsigned int)c); - fprintf(stderr, tr(143, - "%s contains invalid character '%s'\n"), - name, ce); - } + cs = tr(142, "\"%s\" contains invalid %s sequence\n"); + else + cs = tr(143, "\"%s\" contains invalid character %s\n"); + + c = NAME_ADDRSPEC_ERR_GETWC(f); + if (ok8bit && c >= 040 && c <= 0177) + snprintf(cbuf, sizeof cbuf, "'%c'", c); + else + snprintf(cbuf, sizeof cbuf, fmt, c); + + fprintf(stderr, cs, name, cbuf); +jleave: return ((f & NAME_ADDRSPEC_INVALID) != 0); } @@ -619,7 +634,7 @@ is_addr_invalid(struct name *np, int putmsg) * Note well that it may *not* create a duplicate. */ char * -skinned_name(struct name *np) /* TODO !HAVE_ASSERTS legacy */ +skinned_name(struct name const*np) /* TODO !HAVE_ASSERTS legacy */ { #ifdef HAVE_ASSERTS assert(np->n_flags & NAME_SKINNED); @@ -649,6 +664,93 @@ skin(char *name) } /* + * Convert the domain part of a skinned address to IDNA. + * If an error occurs before Unicode information is available, revert the IDNA + * error to a normal CHAR one so that the error message doesn't talk Unicode. + */ +#ifdef USE_IDNA +static struct addrguts * +idna_apply(struct addrguts *agp) +{ + char *idna_utf8, *idna_ascii, *cs; + uint32_t *idna_uni; + size_t sz, i; + int strict = (value("idna-strict-checks") != NULL); + + sz = agp->ag_slen - agp->ag_sdom_start; + assert(sz > 0); + idna_utf8 = ac_alloc(sz + 1); + memcpy(idna_utf8, agp->ag_skinned + agp->ag_sdom_start, sz); + idna_utf8[sz] = '\0'; + + if (! utf8) { + char *tmp = stringprep_locale_to_utf8(idna_utf8); + ac_free(idna_utf8); + idna_utf8 = tmp; + if (idna_utf8 == NULL) { + agp->ag_n_flags ^= NAME_ADDRSPEC_ERR_IDNA | + NAME_ADDRSPEC_ERR_CHAR; + goto jleave; + } + } + + if (idna_to_ascii_8z(idna_utf8, &idna_ascii, + strict ? IDNA_USE_STD3_ASCII_RULES : 0) + != IDNA_SUCCESS) { + agp->ag_n_flags ^= NAME_ADDRSPEC_ERR_IDNA | + NAME_ADDRSPEC_ERR_CHAR; + goto jleave1; + } + + idna_uni = NULL; + if (! strict) + goto jset; + + /* + * Due to normalization that may have occurred we must convert back to + * be able to check for top level domain issues + */ + if (idna_to_unicode_8z4z(idna_ascii, &idna_uni, 0) != IDNA_SUCCESS) { + agp->ag_n_flags ^= NAME_ADDRSPEC_ERR_IDNA | + NAME_ADDRSPEC_ERR_CHAR; + goto jleave2; + } + + i = (size_t)tld_check_4z(idna_uni, &sz, NULL); + free(idna_uni); + if (i != TLD_SUCCESS) { + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, NAME_ADDRSPEC_ERR_IDNA, + idna_uni[sz]); + goto jleave2; + } + +jset: /* Replace the domain part of .ag_skinned with IDNA version */ + sz = strlen(idna_ascii); + i = agp->ag_sdom_start; + cs = salloc(agp->ag_slen - i + sz + 1); + memcpy(cs, agp->ag_skinned, i); + memcpy(cs + i, idna_ascii, sz); + i += sz; + cs[i] = '\0'; + + agp->ag_skinned = cs; + agp->ag_slen = i; + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, + NAME_NAME_SALLOC|NAME_SKINNED|NAME_IDNA, 0); + +jleave2: + free(idna_ascii); +jleave1: + if (utf8) + ac_free(idna_utf8); + else + free(idna_utf8); +jleave: + return (agp); +} +#endif + +/* * Classify and check a (possibly skinned) header body according to RFC * *addr-spec* rules; if it (is assumed to has been) skinned it may however be * also a file or a pipe command, so check that first, then. @@ -659,18 +761,21 @@ addrspec_check(int skinned, struct addrguts *agp) { char *addr, *p, in_quote, in_domain, hadat; union {char c; unsigned char u;} c; +#ifdef USE_IDNA + char use_idna = (value("idna-disable") == NULL); +#endif agp->ag_n_flags |= NAME_ADDRSPEC_CHECKED; addr = agp->ag_skinned; if (agp->ag_iaddr_end <= agp->ag_iaddr_start) { - agp->ag_n_flags |= NAME_ADDRSPEC_INVALID | - NAME_ADDRSPEC_ERR_EMPTY; + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, NAME_ADDRSPEC_ERR_EMPTY, + 0); goto jleave; } /* If the field is not a recipient, it cannot be a file or a pipe */ - if (! skinned) /* XXX || (gfield & (GTO | GCC | GBCC)) == 0) */ + if (! skinned) goto jaddr_check; /* @@ -709,11 +814,15 @@ jaddr_check: for (p = addr; (c.c = *p++) != '\0';) { if (c.c == '"') { in_quote = ! in_quote; - } else if (c.u < 040 || c.u >= 0177) { /*FIXME IDNA!!in_domin */ - /* - if (in_domain) - agp->ag_n_flags |= NAME_ADDRSPEC_IDNA; - else*/ + } else if (c.u < 040 || c.u >= 0177) { +#ifdef USE_IDNA + if (in_domain && use_idna) { + if (use_idna == 1) + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, + NAME_ADDRSPEC_ERR_IDNA, c.u); + use_idna = 2; + } else +#endif break; } else if (in_domain == 2) { if ((c.c == ']' && *p != '\0') || c.c == '\\' || @@ -725,9 +834,8 @@ jaddr_check: ++p; } else if (c.c == '@') { if (hadat++) { - agp->ag_n_flags |= NAME_ADDRSPEC_INVALID | - NAME_ADDRSPEC_ERR_ATSEQ | - NAME_ADDRSPEC_ERR_SETC('@'); + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, + NAME_ADDRSPEC_ERR_ATSEQ, c.u); goto jleave; } agp->ag_sdom_start = (size_t)(p - addr); @@ -741,25 +849,22 @@ jaddr_check: hadat = 0; } - if (c.c == '\0') { - agp->ag_sdom_end = (size_t)(--p - addr); - } else - agp->ag_n_flags |= NAME_ADDRSPEC_INVALID | - NAME_ADDRSPEC_ERR_SETC(c.c); + if (c.c != '\0') { + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, NAME_ADDRSPEC_ERR_CHAR, + c.u); + goto jleave; + } + +#ifdef USE_IDNA + if (use_idna == 2) + agp = idna_apply(agp); +#endif + jleave: return ((agp->ag_n_flags & NAME_ADDRSPEC_INVALID) != 0); } /* - * TODO addrspec_with_guts(!DOSKIN): 'want to release v13, but the code is evil - * TODO in that {,GSKIN,GFULL} are not really enough to handle all names. - * TODO We will have to classify *exactly* those fields we really care about, - * TODO and simply perform high-bit-set checking only (?) for all the others. - * TODO For those we do care for, provide special parsers that classify and - * TODO extract the stuff *exactly* (after a short glance i think NetBSD mailx - * TODO does this). And *do* see namecache and header object TODO notes. - */ -/* * Skin *name* and extract the *addr-spec* according to RFC 5322. TODO 822:5322 * Store the result in .ag_skinned and also fill in those .ag_ fields that have * actually been seen. @@ -769,17 +874,19 @@ jleave: int addrspec_with_guts(int doskin, char const *name, struct addrguts *agp) { - char *cp, *cp2, *bufend, *nbuf, c; + char const *cp; + char *cp2, *bufend, *nbuf, c; int gotlt, lastsp; memset(agp, 0, sizeof *agp); if ((agp->ag_input = name) == NULL || /* XXX ever? */ (agp->ag_ilen = strlen(name)) == 0) { - agp->ag_n_flags |= NAME_ADDRSPEC_CHECKED | - NAME_ADDRSPEC_INVALID | NAME_ADDRSPEC_ERR_EMPTY; agp->ag_skinned = ""; /* NAME_SALLOC not set */ agp->ag_slen = 0; + agp->ag_n_flags |= NAME_ADDRSPEC_CHECKED; + NAME_ADDRSPEC_ERR_SET(agp->ag_n_flags, NAME_ADDRSPEC_ERR_EMPTY, + 0); return (1); } diff --git a/names.c b/names.c index c58cffb0..90d01684 100644 --- a/names.c +++ b/names.c @@ -65,9 +65,8 @@ static struct name * put(struct name *list, struct name *node); static struct name * delname(struct name *np, char *name); /* - * Allocate a single element of a name list, - * initialize its name field to the passed - * name and return it. + * Allocate a single element of a name list, initialize its name field to the + * passed name and return it. */ struct name * nalloc(char *str, enum gfield ntype) @@ -85,20 +84,53 @@ nalloc(char *str, enum gfield ntype) (void)addrspec_with_guts((ntype & (GFULL|GSKIN|GREF)) != 0, str, &ag); if ((ag.ag_n_flags & NAME_NAME_SALLOC) == 0) { ag.ag_n_flags |= NAME_NAME_SALLOC; - ag.ag_skinned = savestr(ag.ag_skinned); + ag.ag_skinned = savestrbuf(ag.ag_skinned, ag.ag_slen); } np->n_fullname = np->n_name = ag.ag_skinned; np->n_flags = ag.ag_n_flags; if (ntype & GFULL) { - if (ag.ag_ilen != ag.ag_slen) { + if (ag.ag_ilen == ag.ag_slen +#ifdef USE_IDNA + && (ag.ag_n_flags & NAME_IDNA) == 0 +#endif + ) + goto jleave; + if (ag.ag_n_flags & NAME_ADDRSPEC_ISFILEORPIPE) + goto jleave; +#ifdef USE_IDNA + if ((ag.ag_n_flags & NAME_IDNA) == 0) { +#endif in.s = str; in.l = ag.ag_ilen; - mime_fromhdr(&in, &out, TD_ISPR|TD_ICONV); - np->n_fullname = savestr(out.s); - free(out.s); - np->n_flags |= NAME_FULLNAME_SALLOC; +#ifdef USE_IDNA + } else { + /* + * The domain name was IDNA and has been converted. + * We also have to ensure that the domain name in + * .n_fullname is replaced with the converted version, + * since MIME doesn't perform encoding of addresses. + */ + size_t l = ag.ag_iaddr_start, + lsuff = ag.ag_ilen - ag.ag_iaddr_end; + in.s = ac_alloc(l + ag.ag_slen + lsuff + 1); + memcpy(in.s, str, l); + memcpy(in.s + l, ag.ag_skinned, ag.ag_slen); + l += ag.ag_slen; + memcpy(in.s + l, str + ag.ag_iaddr_end, lsuff); + l += lsuff; + in.s[l] = '\0'; + in.l = l; } +#endif + mime_fromhdr(&in, &out, TD_ISPR|TD_ICONV); + np->n_fullname = savestr(out.s); + free(out.s); +#ifdef USE_IDNA + if (ag.ag_n_flags & NAME_IDNA) + ac_free(in.s); +#endif + np->n_flags |= NAME_FULLNAME_SALLOC; } else if (ntype & GREF) { /* TODO LEGACY */ /* TODO Unfortunately we had to skin GREFerences i.e. the * TODO surrounding angle brackets have been stripped away. @@ -112,6 +144,7 @@ nalloc(char *str, enum gfield ntype) *(str++) = '>'; *str = '\0'; } +jleave: return (np); } -- 2.11.4.GIT