mapi/rfc1522.c

   1 /*
   2  * ========================================================================
   3  * Copyright 2006 University of Washington
   4  *
   5  * Licensed under the Apache License, Version 2.0 (the "License");
   6  * you may not use this file except in compliance with the License.
   7  * You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * ========================================================================
  12  */
  13
  14 /*
  15  *  rfc1522.c
  16  *
  17  *  right now this is just rfc1522_encode (taken straight out of pine/strings.c,
  18  *  but if were to become necessary,
  19  *  it could be made to do rfc1522_decode too, and it already has some strings functions.
  20  */
  21 #include "pmapi.h"
  22
  23 #define RFC1522_INIT    "=?"
  24 #define RFC1522_INIT_L  2
  25 #define RFC1522_TERM    "?="
  26 #define RFC1522_TERM_L  2
  27 #define RFC1522_DLIM    "?"
  28 #define RFC1522_DLIM_L  1
  29 #define RFC1522_MAXW    75
  30 #define ESPECIALS       "()<>@,;:\"/[]?.="
  31 #define RFC1522_OVERHEAD(S)     (RFC1522_INIT_L + RFC1522_TERM_L +      \
  32                                  (2 * RFC1522_DLIM_L) + strlen(S) + 1);
  33 #define RFC1522_ENC_CHAR(C)     (((C) & 0x80) || !rfc1522_valtok(C)     \
  34                                  || (C) == '_' )
  35 #define SPACE           ' '             /* space character      */
  36 #define ESCAPE          '\033'          /* the escape           */
  37 #define UNKNOWN_CHARSET         "X-UNKNOWN"
  38
  39 /*
  40  * Hex conversion aids
  41  */
  42 #define HEX_ARRAY       "0123456789ABCDEF"
  43 #define HEX_CHAR1(C)    HEX_ARRAY[((C) & 0xf0) >> 4]
  44 #define HEX_CHAR2(C)    HEX_ARRAY[(C) & 0xf]
  45
  46 #define C2XPAIR(C, S)   { \
  47                             *(S)++ = HEX_CHAR1(C); \
  48                             *(S)++ = HEX_CHAR2(C); \
  49                         }
  50
  51
  52 int            rfc1522_token PROTO((char *, int (*) PROTO((int)), char *,
  53                                     char **));
  54 int            rfc1522_valtok PROTO((int));
  55 int            rfc1522_valenc PROTO((int));
  56 int            rfc1522_valid PROTO((char *, char **, char **, char **,
  57                                     char **));
  58 char          *rfc1522_8bit PROTO((void *, int));
  59 char          *rfc1522_binary PROTO((void *, int));
  60 unsigned char *rfc1522_encoded_word PROTO((unsigned char *, int, char *));
  61 char          *strindex PROTO((char *, int));
  62 void        sstrcpy PROTO((char **, char *));
  63 void        sstrncpy PROTO((char **, char *, int));
  64
  65 int         removing_double_quotes PROTO((char *));
  66
  67 static char *known_escapes[] = {
  68     "(B",  "(J",  "$@",  "$B",                  /* RFC 1468 */
  69     "(H",
  70     NULL};
  71 /* different for non-Windows */
  72
  73 int
  74 match_escapes(esc_seq)
  75     char *esc_seq;
  76 {
  77     char **p;
  78     int    n;
  79
  80     for(p = known_escapes; *p && strncmp(esc_seq, *p, n = strlen(*p)); p++)
  81       ;
  82
  83     return(*p ? n + 1 : 0);
  84 }
  85
  86 /*----------------------------------------------------------------------
  87     A replacement for strchr or index ...
  88
  89     Returns a pointer to the first occurrence of the character
  90     'ch' in the specified string or NULL if it doesn't occur
  91
  92  ....so we don't have to worry if it's there or not. We bring our own.
  93 If we really care about efficiency and think the local one is more
  94 efficient the local one can be used, but most of the things that take
  95 a long time are in the c-client and not in pine.
  96  ----*/
  97 char *
  98 strindex(buffer, ch)
  99     char *buffer;
 100     int ch;
 101 {
 102     do
 103       if(*buffer == ch)
 104         return(buffer);
 105     while (*buffer++ != '\0');
 106
 107     return(NULL);
 108 }
 109
 110 /*----------------------------------------------------------------------
 111   copy the source string onto the destination string returning with
 112   the destination string pointer at the end of the destination text
 113
 114   motivation for this is to avoid twice passing over a string that's
 115   being appended to twice (i.e., strcpy(t, x); t += strlen(t))
 116  ----*/
 117 void
 118 sstrcpy(d, s)
 119     char **d;
 120     char *s;
 121 {
 122     while((**d = *s++) != '\0')
 123       (*d)++;
 124 }
 125
 126 void
 127 sstrncpy(d, s, n)
 128     char **d;
 129     char *s;
 130     int n;
 131 {
 132     while(n-- > 0 && (**d = *s++) != '\0')
 133       (*d)++;
 134 }
 135
 136 /*
 137  * rfc1522_token - scan the given source line up to the end_str making
 138  *                 sure all subsequent chars are "valid" leaving endp
 139  *                 a the start of the end_str.
 140  * Returns: TRUE if we got a valid token, FALSE otherwise
 141  */
 142 int
 143 rfc1522_token(s, valid, end_str, endp)
 144     char  *s;
 145     int  (*valid) PROTO((int));
 146     char  *end_str;
 147     char **endp;
 148 {
 149     while(*s){
 150         if((char) *s == *end_str                /* test for matching end_str */
 151            && ((end_str[1])
 152                 ? !strncmp((char *)s + 1, end_str + 1, strlen(end_str + 1))
 153                 : 1)){
 154             *endp = s;
 155             return(TRUE);
 156         }
 157
 158         if(!(*valid)(*s++))                     /* test for valid char */
 159           break;
 160     }
 161
 162     return(FALSE);
 163 }
 164
 165
 166 /*
 167  * rfc1522_valtok - test for valid character in the RFC 1522 encoded
 168  *                  word's charset and encoding fields.
 169  */
 170 int
 171 rfc1522_valtok(c)
 172     int c;
 173 {
 174     return(!(c == SPACE || iscntrl(c & 0x7f) || strindex(ESPECIALS, c)));
 175 }
 176
 177
 178 /*
 179  * rfc1522_valenc - test for valid character in the RFC 1522 encoded
 180  *                  word's encoded-text field.
 181  */
 182 int
 183 rfc1522_valenc(c)
 184     int c;
 185 {
 186     return(!(c == '?' || c == SPACE) && isprint((unsigned char)c));
 187 }
 188
 189
 190 /*
 191  * rfc1522_valid - validate the given string as to it's rfc1522-ness
 192  */
 193 int
 194 rfc1522_valid(s, charset, enc, txt, endp)
 195     char  *s;
 196     char **charset;
 197     char **enc;
 198     char **txt;
 199     char **endp;
 200 {
 201     char *c, *e, *t, *p;
 202     int   rv;
 203
 204     rv = rfc1522_token(c = s+RFC1522_INIT_L, rfc1522_valtok, RFC1522_DLIM, &e)
 205            && rfc1522_token(++e, rfc1522_valtok, RFC1522_DLIM, &t)
 206            && rfc1522_token(++t, rfc1522_valenc, RFC1522_TERM, &p)
 207            && p - s <= RFC1522_MAXW;
 208
 209     if(charset)
 210       *charset = c;
 211
 212     if(enc)
 213       *enc = e;
 214
 215     if(txt)
 216       *txt = t;
 217
 218     if(endp)
 219       *endp = p;
 220
 221     return(rv);
 222 }
 223
 224
 225 /*
 226  * rfc1522_encode - encode the given source string ala RFC 1522,
 227  *                  IF NECESSARY, into the given destination buffer.
 228  *                  Don't bother copying if it turns out encoding
 229  *                  isn't necessary.
 230  *
 231  * Returns: pointer to either the destination buffer containing the
 232  *          encoded text, or a pointer to the source buffer if we didn't
 233  *          have to encode anything.
 234  */
 235 char *
 236 rfc1522_encode(d, len, s, charset)
 237     char          *d;
 238     size_t         len;         /* length of d */
 239     unsigned char *s;
 240     char          *charset;
 241 {
 242     unsigned char *p, *q;
 243     int            n;
 244
 245     if(!s)
 246       return((char *) s);
 247
 248     if(!charset)
 249       charset = UNKNOWN_CHARSET;
 250
 251     /* look for a reason to encode */
 252     for(p = s, n = 0; *p; p++)
 253       if((*p) & 0x80){
 254           n++;
 255       }
 256       else if(*p == RFC1522_INIT[0]
 257               && !strncmp((char *) p, RFC1522_INIT, RFC1522_INIT_L)){
 258           if(rfc1522_valid((char *) p, NULL, NULL, NULL, (char **) &q))
 259             p = q + RFC1522_TERM_L - 1;         /* advance past encoded gunk */
 260       }
 261       else if(*p == ESCAPE && match_escapes((char *)(p+1))){
 262           n++;
 263       }
 264
 265     if(n){                                      /* found, encoding to do */
 266         char *rv  = d, *t,
 267               enc = (n > (2 * (p - s)) / 3) ? 'B' : 'Q';
 268
 269         while(*s){
 270             if(d-rv < len-1-(RFC1522_INIT_L+2*RFC1522_DLIM_L+1)){
 271                 sstrcpy(&d, RFC1522_INIT);      /* insert intro header, */
 272                 sstrcpy(&d, charset);           /* character set tag, */
 273                 sstrcpy(&d, RFC1522_DLIM);      /* and encoding flavor */
 274                 *d++ = enc;
 275                 sstrcpy(&d, RFC1522_DLIM);
 276             }
 277
 278             /*
 279              * feed lines to encoder such that they're guaranteed
 280              * less than RFC1522_MAXW.
 281              */
 282             p = rfc1522_encoded_word(s, enc, charset);
 283             if(enc == 'B')                      /* insert encoded data */
 284               sstrncpy(&d, t = rfc1522_binary(s, p - s), len-1-(d-rv));
 285             else                                /* 'Q' encoding */
 286               sstrncpy(&d, t = rfc1522_8bit(s, p - s), len-1-(d-rv));
 287
 288             sstrncpy(&d, RFC1522_TERM, len-1-(d-rv));   /* insert terminator */
 289             fs_give((void **) &t);
 290             if(*p)                              /* more src string follows */
 291               sstrncpy(&d, "\015\012 ", len-1-(d-rv));  /* insert cont. line */
 292
 293             s = p;                              /* advance s */
 294         }
 295
 296         rv[len-1] = '\0';
 297         return(rv);
 298     }
 299     else
 300       return((char *) s);                       /* no work for us here */
 301 }
 302
 303
 304
 305 /*
 306  * rfc1522_encoded_word -- cut given string into max length encoded word
 307  *
 308  * Return: pointer into 's' such that the encoded 's' is no greater
 309  *         than RFC1522_MAXW
 310  *
 311  *  NOTE: this line break code is NOT cognizant of any SI/SO
 312  *  charset requirements nor similar strategies using escape
 313  *  codes.  Hopefully this will matter little and such
 314  *  representation strategies don't also include 8bit chars.
 315  */
 316 unsigned char *
 317 rfc1522_encoded_word(s, enc, charset)
 318     unsigned char *s;
 319     int            enc;
 320     char          *charset;
 321 {
 322     int goal = RFC1522_MAXW - RFC1522_OVERHEAD(charset);
 323
 324     if(enc == 'B')                      /* base64 encode */
 325       for(goal = ((goal / 4) * 3) - 2; goal && *s; goal--, s++)
 326         ;
 327     else                                /* special 'Q' encoding */
 328       for(; goal && *s; s++)
 329         if((goal -= RFC1522_ENC_CHAR(*s) ? 3 : 1) < 0)
 330           break;
 331
 332     return(s);
 333 }
 334
 335
 336
 337 /*
 338  * rfc1522_8bit -- apply RFC 1522 'Q' encoding to the given 8bit buffer
 339  *
 340  * Return: alloc'd buffer containing encoded string
 341  */
 342 char *
 343 rfc1522_8bit(src, slen)
 344     void *src;
 345     int   slen;
 346 {
 347     char *ret = (char *) fs_get ((size_t) (3*slen + 2));
 348     char *d = ret;
 349     unsigned char c;
 350     unsigned char *s = (unsigned char *) src;
 351
 352     while (slen--) {                            /* for each character */
 353         if (((c = *s++) == '\015') && (*s == '\012') && slen) {
 354             *d++ = '\015';                      /* true line break */
 355             *d++ = *s++;
 356             slen--;
 357         }
 358         else if(c == SPACE){                    /* special encoding case */
 359             *d++ = '_';
 360         }
 361         else if(RFC1522_ENC_CHAR(c)){
 362             *d++ = '=';                         /* quote character */
 363             C2XPAIR(c, d);
 364         }
 365         else
 366           *d++ = (char) c;                      /* ordinary character */
 367     }
 368
 369     *d = '\0';                                  /* tie off destination */
 370     return(ret);
 371 }
 372
 373
 374 /*
 375  * rfc1522_binary -- apply RFC 1522 'B' encoding to the given 8bit buffer
 376  *
 377  * Return: alloc'd buffer containing encoded string
 378  */
 379 char *
 380 rfc1522_binary (src, srcl)
 381     void *src;
 382     int   srcl;
 383 {
 384     static char *v =
 385             "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 386     unsigned char *s = (unsigned char *) src;
 387     char *ret, *d;
 388
 389     d = ret = (char *) fs_get ((size_t) ((((srcl + 2) / 3) * 4) + 1));
 390     for (; srcl; s += 3) {      /* process tuplets */
 391                                 /* byte 1: high 6 bits (1) */
 392         *d++ = v[s[0] >> 2];
 393                                 /* byte 2: low 2 bits (1), high 4 bits (2) */
 394         *d++ = v[((s[0] << 4) + (--srcl ? (s[1] >> 4) : 0)) & 0x3f];
 395                                 /* byte 3: low 4 bits (2), high 2 bits (3) */
 396         *d++ = srcl ? v[((s[1] << 2) + (--srcl ? (s[2] >> 6) :0)) & 0x3f] :'=';
 397                                 /* byte 4: low 6 bits (3) */
 398         *d++ = srcl ? v[s[2] & 0x3f] : '=';
 399         if(srcl)
 400           srcl--;               /* count third character if processed */
 401     }
 402
 403     *d = '\0';                  /* tie off string */
 404     return(ret);                /* return the resulting string */
 405 }
 406
 407
 408 /*
 409  *  Function to parse the given string into two space-delimited fields
 410  *  Quotes may be used to surround labels or values with spaces in them.
 411  *  Backslash negates the special meaning of a quote.
 412  *  Unescaping of backslashes only happens if the pair member is quoted,
 413  *    this provides for backwards compatibility.
 414  *
 415  * Args -- string -- the source string
 416  *          label -- the first half of the string, a return value
 417  *          value -- the last half of the string, a return value
 418  *        firstws -- if set, the halves are delimited by the first unquoted
 419  *                    whitespace, else by the last unquoted whitespace
 420  *   strip_internal_label_quotes -- unescaped quotes in the middle of the label
 421  *                                   are removed. This is useful for vars
 422  *                                   like display-filters and url-viewers
 423  *                                   which may require quoting of an arg
 424  *                                   inside of a _TOKEN_.
 425  */
 426 void
 427 get_pair(string, label, value, firstws, strip_internal_label_quotes)
 428     char *string, **label, **value;
 429     int   firstws;
 430     int   strip_internal_label_quotes;
 431 {
 432     char *p, *q, *tmp, *token = NULL;
 433     int   quoted = 0;
 434
 435     *label = *value = NULL;
 436
 437     /*
 438      * This for loop just finds the beginning of the value. If firstws
 439      * is set, then it begins after the first whitespace. Otherwise, it begins
 440      * after the last whitespace. Quoted whitespace doesn't count as
 441      * whitespace. If there is no unquoted whitespace, then there is no
 442      * label, there's just a value.
 443      */
 444     for(p = string; p && *p;){
 445         if(*p == '"')                           /* quoted label? */
 446           quoted = (quoted) ? 0 : 1;
 447
 448         if(*p == '\\' && *(p+1) == '"')         /* escaped quote? */
 449           p++;                                  /* skip it... */
 450
 451         if(isspace((unsigned char)*p) && !quoted){      /* if space,  */
 452             while(*++p && isspace((unsigned char)*p))   /* move past it */
 453               ;
 454
 455             if(!firstws || !token)
 456               token = p;                        /* remember start of text */
 457         }
 458         else
 459           p++;
 460     }
 461
 462     if(token){                                  /* copy label */
 463         *label = p = (char *)fs_get(((token - string) + 1) * sizeof(char));
 464
 465         /* make a copy of the string */
 466         tmp = (char *)fs_get(((token - string) + 1) * sizeof(char));
 467         strncpy(tmp, string, token - string);
 468         tmp[token-string] = '\0';
 469
 470         removing_leading_and_trailing_white_space(tmp);
 471         quoted = removing_double_quotes(tmp);
 472
 473         for(q = tmp; *q; q++){
 474             if(quoted && *q == '\\' && (*(q+1) == '"' || *(q+1) == '\\'))
 475               *p++ = *++q;
 476             else if(!(strip_internal_label_quotes && *q == '"'))
 477               *p++ = *q;
 478         }
 479
 480         *p = '\0';                              /* tie off label */
 481         fs_give((void **)&tmp);
 482         if(*label == '\0')
 483           fs_give((void **)label);
 484     }
 485     else
 486       token = string;
 487
 488     if(token){                                  /* copy value */
 489         *value = p = (char *)fs_get((strlen(token) + 1) * sizeof(char));
 490
 491         tmp = cpystr(token);
 492         removing_leading_and_trailing_white_space(tmp);
 493         quoted = removing_double_quotes(tmp);
 494
 495         for(q = tmp; *q ; q++){
 496             if(quoted && *q == '\\' && (*(q+1) == '"' || *(q+1) == '\\'))
 497               *p++ = *++q;
 498             else
 499               *p++ = *q;
 500         }
 501
 502         *p = '\0';                              /* tie off value */
 503         fs_give((void **)&tmp);
 504     }
 505 }
 506
 507 void
 508 removing_leading_and_trailing_white_space(string)
 509      char *string;
 510 {
 511     register char *p, *q = NULL;
 512
 513     if(!string)
 514       return;
 515
 516     for(p = string; *p; p++)            /* find the first non-blank  */
 517       if(!isspace((unsigned char)*p)){
 518           while(*string = *p++){        /* copy back from there... */
 519               q = (!isspace((unsigned char)*string)) ? NULL : (!q) ? string : q;
 520               string++;
 521           }
 522
 523           if(q)
 524             *q = '\0';
 525
 526           return;
 527       }
 528
 529     if(*string != '\0')
 530       *string = '\0';
 531 }
 532
 533 /*----------------------------------------------------------------------
 534        Remove one set of double quotes surrounding string in place
 535        Returns 1 if quotes were removed
 536
 537   Args: string -- string to remove quotes from
 538   ----*/
 539 int
 540 removing_double_quotes(string)
 541      char *string;
 542 {
 543     register char *p;
 544     int ret = 0;
 545
 546     if(string && string[0] == '"' && string[1] != '\0'){
 547         p = string + strlen(string) - 1;
 548         if(*p == '"'){
 549             ret++;
 550             *p = '\0';
 551             for(p = string; *p; p++)
 552               *p = *(p+1);
 553         }
 554     }
 555
 556     return(ret);
 557 }