* New version 2.19.12
[alpine.git] / mapi / rfc1522.c
blobf77a8c2731046fd3e7d43c462c65478b9efd11a0
1 /*
2 * ========================================================================
3 * Copyright 2006 University of Washington
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * ========================================================================
15 * rfc1522.c
17 * right now this is just rfc1522_encode (taken straight out of pine/strings.c,
18 * but if were to become necessary,
19 * it could be made to do rfc1522_decode too, and it already has some strings functions.
21 #include "pmapi.h"
23 #define RFC1522_INIT "=?"
24 #define RFC1522_INIT_L 2
25 #define RFC1522_TERM "?="
26 #define RFC1522_TERM_L 2
27 #define RFC1522_DLIM "?"
28 #define RFC1522_DLIM_L 1
29 #define RFC1522_MAXW 75
30 #define ESPECIALS "()<>@,;:\"/[]?.="
31 #define RFC1522_OVERHEAD(S) (RFC1522_INIT_L + RFC1522_TERM_L + \
32 (2 * RFC1522_DLIM_L) + strlen(S) + 1);
33 #define RFC1522_ENC_CHAR(C) (((C) & 0x80) || !rfc1522_valtok(C) \
34 || (C) == '_' )
35 #define SPACE ' ' /* space character */
36 #define ESCAPE '\033' /* the escape */
37 #define UNKNOWN_CHARSET "X-UNKNOWN"
40 * Hex conversion aids
42 #define HEX_ARRAY "0123456789ABCDEF"
43 #define HEX_CHAR1(C) HEX_ARRAY[((C) & 0xf0) >> 4]
44 #define HEX_CHAR2(C) HEX_ARRAY[(C) & 0xf]
46 #define C2XPAIR(C, S) { \
47 *(S)++ = HEX_CHAR1(C); \
48 *(S)++ = HEX_CHAR2(C); \
52 int rfc1522_token PROTO((char *, int (*) PROTO((int)), char *,
53 char **));
54 int rfc1522_valtok PROTO((int));
55 int rfc1522_valenc PROTO((int));
56 int rfc1522_valid PROTO((char *, char **, char **, char **,
57 char **));
58 char *rfc1522_8bit PROTO((void *, int));
59 char *rfc1522_binary PROTO((void *, int));
60 unsigned char *rfc1522_encoded_word PROTO((unsigned char *, int, char *));
61 char *strindex PROTO((char *, int));
62 void sstrcpy PROTO((char **, char *));
63 void sstrncpy PROTO((char **, char *, int));
65 int removing_double_quotes PROTO((char *));
67 static char *known_escapes[] = {
68 "(B", "(J", "$@", "$B", /* RFC 1468 */
69 "(H",
70 NULL};
71 /* different for non-Windows */
73 int
74 match_escapes(esc_seq)
75 char *esc_seq;
77 char **p;
78 int n;
80 for(p = known_escapes; *p && strncmp(esc_seq, *p, n = strlen(*p)); p++)
83 return(*p ? n + 1 : 0);
86 /*----------------------------------------------------------------------
87 A replacement for strchr or index ...
89 Returns a pointer to the first occurrence of the character
90 'ch' in the specified string or NULL if it doesn't occur
92 ....so we don't have to worry if it's there or not. We bring our own.
93 If we really care about efficiency and think the local one is more
94 efficient the local one can be used, but most of the things that take
95 a long time are in the c-client and not in pine.
96 ----*/
97 char *
98 strindex(buffer, ch)
99 char *buffer;
100 int ch;
103 if(*buffer == ch)
104 return(buffer);
105 while (*buffer++ != '\0');
107 return(NULL);
110 /*----------------------------------------------------------------------
111 copy the source string onto the destination string returning with
112 the destination string pointer at the end of the destination text
114 motivation for this is to avoid twice passing over a string that's
115 being appended to twice (i.e., strcpy(t, x); t += strlen(t))
116 ----*/
117 void
118 sstrcpy(d, s)
119 char **d;
120 char *s;
122 while((**d = *s++) != '\0')
123 (*d)++;
126 void
127 sstrncpy(d, s, n)
128 char **d;
129 char *s;
130 int n;
132 while(n-- > 0 && (**d = *s++) != '\0')
133 (*d)++;
137 * rfc1522_token - scan the given source line up to the end_str making
138 * sure all subsequent chars are "valid" leaving endp
139 * a the start of the end_str.
140 * Returns: TRUE if we got a valid token, FALSE otherwise
143 rfc1522_token(s, valid, end_str, endp)
144 char *s;
145 int (*valid) PROTO((int));
146 char *end_str;
147 char **endp;
149 while(*s){
150 if((char) *s == *end_str /* test for matching end_str */
151 && ((end_str[1])
152 ? !strncmp((char *)s + 1, end_str + 1, strlen(end_str + 1))
153 : 1)){
154 *endp = s;
155 return(TRUE);
158 if(!(*valid)(*s++)) /* test for valid char */
159 break;
162 return(FALSE);
167 * rfc1522_valtok - test for valid character in the RFC 1522 encoded
168 * word's charset and encoding fields.
171 rfc1522_valtok(c)
172 int c;
174 return(!(c == SPACE || iscntrl(c & 0x7f) || strindex(ESPECIALS, c)));
179 * rfc1522_valenc - test for valid character in the RFC 1522 encoded
180 * word's encoded-text field.
183 rfc1522_valenc(c)
184 int c;
186 return(!(c == '?' || c == SPACE) && isprint((unsigned char)c));
191 * rfc1522_valid - validate the given string as to it's rfc1522-ness
194 rfc1522_valid(s, charset, enc, txt, endp)
195 char *s;
196 char **charset;
197 char **enc;
198 char **txt;
199 char **endp;
201 char *c, *e, *t, *p;
202 int rv;
204 rv = rfc1522_token(c = s+RFC1522_INIT_L, rfc1522_valtok, RFC1522_DLIM, &e)
205 && rfc1522_token(++e, rfc1522_valtok, RFC1522_DLIM, &t)
206 && rfc1522_token(++t, rfc1522_valenc, RFC1522_TERM, &p)
207 && p - s <= RFC1522_MAXW;
209 if(charset)
210 *charset = c;
212 if(enc)
213 *enc = e;
215 if(txt)
216 *txt = t;
218 if(endp)
219 *endp = p;
221 return(rv);
226 * rfc1522_encode - encode the given source string ala RFC 1522,
227 * IF NECESSARY, into the given destination buffer.
228 * Don't bother copying if it turns out encoding
229 * isn't necessary.
231 * Returns: pointer to either the destination buffer containing the
232 * encoded text, or a pointer to the source buffer if we didn't
233 * have to encode anything.
235 char *
236 rfc1522_encode(d, len, s, charset)
237 char *d;
238 size_t len; /* length of d */
239 unsigned char *s;
240 char *charset;
242 unsigned char *p, *q;
243 int n;
245 if(!s)
246 return((char *) s);
248 if(!charset)
249 charset = UNKNOWN_CHARSET;
251 /* look for a reason to encode */
252 for(p = s, n = 0; *p; p++)
253 if((*p) & 0x80){
254 n++;
256 else if(*p == RFC1522_INIT[0]
257 && !strncmp((char *) p, RFC1522_INIT, RFC1522_INIT_L)){
258 if(rfc1522_valid((char *) p, NULL, NULL, NULL, (char **) &q))
259 p = q + RFC1522_TERM_L - 1; /* advance past encoded gunk */
261 else if(*p == ESCAPE && match_escapes((char *)(p+1))){
262 n++;
265 if(n){ /* found, encoding to do */
266 char *rv = d, *t,
267 enc = (n > (2 * (p - s)) / 3) ? 'B' : 'Q';
269 while(*s){
270 if(d-rv < len-1-(RFC1522_INIT_L+2*RFC1522_DLIM_L+1)){
271 sstrcpy(&d, RFC1522_INIT); /* insert intro header, */
272 sstrcpy(&d, charset); /* character set tag, */
273 sstrcpy(&d, RFC1522_DLIM); /* and encoding flavor */
274 *d++ = enc;
275 sstrcpy(&d, RFC1522_DLIM);
279 * feed lines to encoder such that they're guaranteed
280 * less than RFC1522_MAXW.
282 p = rfc1522_encoded_word(s, enc, charset);
283 if(enc == 'B') /* insert encoded data */
284 sstrncpy(&d, t = rfc1522_binary(s, p - s), len-1-(d-rv));
285 else /* 'Q' encoding */
286 sstrncpy(&d, t = rfc1522_8bit(s, p - s), len-1-(d-rv));
288 sstrncpy(&d, RFC1522_TERM, len-1-(d-rv)); /* insert terminator */
289 fs_give((void **) &t);
290 if(*p) /* more src string follows */
291 sstrncpy(&d, "\015\012 ", len-1-(d-rv)); /* insert cont. line */
293 s = p; /* advance s */
296 rv[len-1] = '\0';
297 return(rv);
299 else
300 return((char *) s); /* no work for us here */
306 * rfc1522_encoded_word -- cut given string into max length encoded word
308 * Return: pointer into 's' such that the encoded 's' is no greater
309 * than RFC1522_MAXW
311 * NOTE: this line break code is NOT cognizant of any SI/SO
312 * charset requirements nor similar strategies using escape
313 * codes. Hopefully this will matter little and such
314 * representation strategies don't also include 8bit chars.
316 unsigned char *
317 rfc1522_encoded_word(s, enc, charset)
318 unsigned char *s;
319 int enc;
320 char *charset;
322 int goal = RFC1522_MAXW - RFC1522_OVERHEAD(charset);
324 if(enc == 'B') /* base64 encode */
325 for(goal = ((goal / 4) * 3) - 2; goal && *s; goal--, s++)
327 else /* special 'Q' encoding */
328 for(; goal && *s; s++)
329 if((goal -= RFC1522_ENC_CHAR(*s) ? 3 : 1) < 0)
330 break;
332 return(s);
338 * rfc1522_8bit -- apply RFC 1522 'Q' encoding to the given 8bit buffer
340 * Return: alloc'd buffer containing encoded string
342 char *
343 rfc1522_8bit(src, slen)
344 void *src;
345 int slen;
347 char *ret = (char *) fs_get ((size_t) (3*slen + 2));
348 char *d = ret;
349 unsigned char c;
350 unsigned char *s = (unsigned char *) src;
352 while (slen--) { /* for each character */
353 if (((c = *s++) == '\015') && (*s == '\012') && slen) {
354 *d++ = '\015'; /* true line break */
355 *d++ = *s++;
356 slen--;
358 else if(c == SPACE){ /* special encoding case */
359 *d++ = '_';
361 else if(RFC1522_ENC_CHAR(c)){
362 *d++ = '='; /* quote character */
363 C2XPAIR(c, d);
365 else
366 *d++ = (char) c; /* ordinary character */
369 *d = '\0'; /* tie off destination */
370 return(ret);
375 * rfc1522_binary -- apply RFC 1522 'B' encoding to the given 8bit buffer
377 * Return: alloc'd buffer containing encoded string
379 char *
380 rfc1522_binary (src, srcl)
381 void *src;
382 int srcl;
384 static char *v =
385 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
386 unsigned char *s = (unsigned char *) src;
387 char *ret, *d;
389 d = ret = (char *) fs_get ((size_t) ((((srcl + 2) / 3) * 4) + 1));
390 for (; srcl; s += 3) { /* process tuplets */
391 /* byte 1: high 6 bits (1) */
392 *d++ = v[s[0] >> 2];
393 /* byte 2: low 2 bits (1), high 4 bits (2) */
394 *d++ = v[((s[0] << 4) + (--srcl ? (s[1] >> 4) : 0)) & 0x3f];
395 /* byte 3: low 4 bits (2), high 2 bits (3) */
396 *d++ = srcl ? v[((s[1] << 2) + (--srcl ? (s[2] >> 6) :0)) & 0x3f] :'=';
397 /* byte 4: low 6 bits (3) */
398 *d++ = srcl ? v[s[2] & 0x3f] : '=';
399 if(srcl)
400 srcl--; /* count third character if processed */
403 *d = '\0'; /* tie off string */
404 return(ret); /* return the resulting string */
409 * Function to parse the given string into two space-delimited fields
410 * Quotes may be used to surround labels or values with spaces in them.
411 * Backslash negates the special meaning of a quote.
412 * Unescaping of backslashes only happens if the pair member is quoted,
413 * this provides for backwards compatibility.
415 * Args -- string -- the source string
416 * label -- the first half of the string, a return value
417 * value -- the last half of the string, a return value
418 * firstws -- if set, the halves are delimited by the first unquoted
419 * whitespace, else by the last unquoted whitespace
420 * strip_internal_label_quotes -- unescaped quotes in the middle of the label
421 * are removed. This is useful for vars
422 * like display-filters and url-viewers
423 * which may require quoting of an arg
424 * inside of a _TOKEN_.
426 void
427 get_pair(string, label, value, firstws, strip_internal_label_quotes)
428 char *string, **label, **value;
429 int firstws;
430 int strip_internal_label_quotes;
432 char *p, *q, *tmp, *token = NULL;
433 int quoted = 0;
435 *label = *value = NULL;
438 * This for loop just finds the beginning of the value. If firstws
439 * is set, then it begins after the first whitespace. Otherwise, it begins
440 * after the last whitespace. Quoted whitespace doesn't count as
441 * whitespace. If there is no unquoted whitespace, then there is no
442 * label, there's just a value.
444 for(p = string; p && *p;){
445 if(*p == '"') /* quoted label? */
446 quoted = (quoted) ? 0 : 1;
448 if(*p == '\\' && *(p+1) == '"') /* escaped quote? */
449 p++; /* skip it... */
451 if(isspace((unsigned char)*p) && !quoted){ /* if space, */
452 while(*++p && isspace((unsigned char)*p)) /* move past it */
455 if(!firstws || !token)
456 token = p; /* remember start of text */
458 else
459 p++;
462 if(token){ /* copy label */
463 *label = p = (char *)fs_get(((token - string) + 1) * sizeof(char));
465 /* make a copy of the string */
466 tmp = (char *)fs_get(((token - string) + 1) * sizeof(char));
467 strncpy(tmp, string, token - string);
468 tmp[token-string] = '\0';
470 removing_leading_and_trailing_white_space(tmp);
471 quoted = removing_double_quotes(tmp);
473 for(q = tmp; *q; q++){
474 if(quoted && *q == '\\' && (*(q+1) == '"' || *(q+1) == '\\'))
475 *p++ = *++q;
476 else if(!(strip_internal_label_quotes && *q == '"'))
477 *p++ = *q;
480 *p = '\0'; /* tie off label */
481 fs_give((void **)&tmp);
482 if(*label == '\0')
483 fs_give((void **)label);
485 else
486 token = string;
488 if(token){ /* copy value */
489 *value = p = (char *)fs_get((strlen(token) + 1) * sizeof(char));
491 tmp = cpystr(token);
492 removing_leading_and_trailing_white_space(tmp);
493 quoted = removing_double_quotes(tmp);
495 for(q = tmp; *q ; q++){
496 if(quoted && *q == '\\' && (*(q+1) == '"' || *(q+1) == '\\'))
497 *p++ = *++q;
498 else
499 *p++ = *q;
502 *p = '\0'; /* tie off value */
503 fs_give((void **)&tmp);
507 void
508 removing_leading_and_trailing_white_space(string)
509 char *string;
511 register char *p, *q = NULL;
513 if(!string)
514 return;
516 for(p = string; *p; p++) /* find the first non-blank */
517 if(!isspace((unsigned char)*p)){
518 while(*string = *p++){ /* copy back from there... */
519 q = (!isspace((unsigned char)*string)) ? NULL : (!q) ? string : q;
520 string++;
523 if(q)
524 *q = '\0';
526 return;
529 if(*string != '\0')
530 *string = '\0';
533 /*----------------------------------------------------------------------
534 Remove one set of double quotes surrounding string in place
535 Returns 1 if quotes were removed
537 Args: string -- string to remove quotes from
538 ----*/
540 removing_double_quotes(string)
541 char *string;
543 register char *p;
544 int ret = 0;
546 if(string && string[0] == '"' && string[1] != '\0'){
547 p = string + strlen(string) - 1;
548 if(*p == '"'){
549 ret++;
550 *p = '\0';
551 for(p = string; *p; p++)
552 *p = *(p+1);
556 return(ret);