get_message() passes two tests
[libmime.git] / message.myr
blob32021a07774b9fbe91d2ac5e1d91ea005134e4a5
1 use std
2 use bio
3 use date
5 use "rfc2047"
6 use "util"
8 /*
9    This is based on RFC 5322 (for handling headers) and RFCs 2045,
10    2046 (for the structure of MIME). The parameters are decoded as
11    per RFC 2231.
12  */
14 pkg mime =
15         type message = struct
16                 /*
17                    Master raw data; all sub-objects are slices of
18                    this. The goal is that, to free a message, freeing
19                    raw should deal with all the contents
20                  */
21                 raw : byte[:]
22                 contents : entity
23         ;;
25         type mimebody = union
26                 /* Perhaps plaintext, or base64 encoded application/pdf */
27                 `Single byte[:]
29                 /* Text and attachment, plain/HTML, &c */
30                 `Multipart entity[:]
31         ;;
33         type entity = struct
34                 /* A slice of a message.raw somewhere up above */
35                 raw : byte[:]
37                 /* NOT sourced from raw, must be freed separately */
38                 headers : std.htab(byte[:], byte[:][:])#
40                 /* Potentially recursive definition */
41                 body : mimebody
43                 /*
44                    Perhaps 'filename' from Content-Disposition, or
45                    'name' from Content-Type. Must be freed separately.
46                  */
47                 name : std.option(byte[:])
49                 /*
50                    Memory for the following resides in the headers'
51                    values, or else is static (so don't free it).
52                  */
54                 /* Only the "type/subtype" bit */
55                 contenttype : byte[:]
56         ;;
58         /*
59            Given raw, assume it represents a message, or the beginning
60            of one, and extract the headers into key/value.
62            The values of the output are multiple in case a message
63            has duplicate fields (e.g. "Subject: foo" and "Subject:
64            bar"). Behavior is unspecified (RFC 2822, 4.5).
65          */
66         const get_headers : (raw : byte[:] -> std.result(std.htab(byte[:], byte[:][:])#, byte[:]))
68         /*
69            Given raw, like "foo: bar; a=\"b\"; c*=utf-8''whatever",
70            parse the params as per RFC 2231
71          */
72         const get_params : (raw : byte[:] -> std.result(std.htab(byte[:], byte[:])#, byte[:]))
74         /*
75            Given raw, parse it into a MIME message. This takes
76            ownership of raw if successful.
77          */
78         const get_message : (raw : byte[:] -> std.result(message#, byte[:]))
80         /*
81            Free all memory used by m, including the raw data used
82            to construct it.
83          */
84         const free_message : (m : message# -> void)
86         /* Parse the God-forsaken date format */
87         const parse_rfc5322_date : (s : byte[:] -> std.result(date.instant, byte[:]))
90 const get_headers = {raw : byte[:]
91         var h : std.htab(byte[:], byte[:][:])# = std.mkht()
92         var err : std.strbuf# = std.mksb()
93         var cline : std.strbuf# = std.mksb()
94         var split : byte[:][:] = std.strsplit(raw, "\r\n")
95         for l : split
96                 if l.len > 0 && is_WSP(l[0])
97                         /*
98                            This is a wrapped header, but we can't
99                            unwrap it now, because it might be a RFC
100                            2047 encodedword.  For those, CRLF+' '
101                            should be deleted, but for bog standard
102                            RFC 5322, CRLF+' ' should be treated as
103                            ' '.
104                          */
105                         std.sbfmt(cline, "\n{}", l)
106                 elif l.len == 0
107                         if cline.len > 0
108                                 match handle_header(h, std.sbpeek(cline))
109                                 | `std.Ok void:
110                                 | `std.Err e:
111                                         std.sbfmt(err, "malformed headers: {}", e)
112                                         std.slfree(e)
113                                         break
114                                 ;;
115                         ;;
117                         /* Done with headers */
118                         std.sbtrim(cline, 0)
119                         break
120                 elif l.len > 0 && all_WSP(l)
121                         std.sbfmt(err, "all-whitespace line in headers")
122                         break
123                 else
124                         if std.sbpeek(cline).len > 0
125                                 match handle_header(h, std.sbpeek(cline))
126                                 | `std.Ok void:
127                                 | `std.Err e:
128                                         std.sbfmt(err, "malformed headers: {}", e)
129                                         std.slfree(e)
130                                         break
131                                 ;;
132                         ;;
134                         std.sbtrim(cline, 0)
135                         std.sbfmt(cline, "{}", l)
136                 ;;
137         ;;
138         std.slfree(split)
139         split = [][:]
141         /*
142            Perhaps cline just contains whitespace right now --
143            nothing useful. This is probably the case if we're parsing
144            a full message, because we hit the "Done with headers"
145            case above. Perhaps not, though.
146          */
147         if !all_WSP(std.sbpeek(cline))
148                 match handle_header(h, std.sbpeek(cline))
149                 | `std.Ok void:
150                 | `std.Err e:
151                         std.sbfmt(err, "malformed headers: {}", e)
152                         std.slfree(e)
153                 ;;
154         ;;
155         std.sbfree(cline)
157         /*
158            Now we've slurped all the raw data in, but perhaps it
159            needs to be decoded. 
160          */
161         decode_all(h)
163 :done
164         match err.len
165         | 0:
166                 std.sbfree(err)
167                 -> `std.Ok (h)
168         | _:
169                 free_all_headers(h)
170                 std.htfree(h)
172                 -> `std.Err std.sbfin(err)
173         ;;
176 const is_WSP_or_nl = { b : byte
177         var c : uint8 = (b : uint8)
178         -> c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20
181 const is_WSP = { b : byte
182         var c : uint8 = (b : uint8)
183         -> c == 0x09 || c == 0x20
186 const all_WSP = { l : byte[:]
187         for b : l
188                 if (b : uint8) != 0x20 && (b : uint8) != 0x09
189                         -> false
190                 ;;
191         ;;
193         -> true
196 const handle_header = { h : std.htab(byte[:], byte[:][:])#, l : byte[:]
197         var err : std.strbuf# = std.mksb()
199         match std.strfind(l, ":")
200         | `std.None:
201                 std.sbfmt(err, "\"{}\" has no ':'", l)
202                 -> `std.Err std.sbfin(err)
203         | `std.Some j:
204                 var k : byte[:] = str_to_lower(l[0:j])
205                 var v : byte[:] = [][:]
207                 for j++; j < l.len; j++
208                         if !is_WSP(l[j])
209                                 v = std.sldup(l[j:])
210                                 break
211                         ;;
212                 ;;
214                 match std.htget(h, k)
215                 | `std.None:
216                         std.htput(h, k, std.sldup([ v ][:]))
217                 | `std.Some t:
218                         std.slpush(&t, v)
219                         std.htput(h, k, t)
220                 ;;
221         ;;
223         -> `std.Ok void
227    The XXX_all functions are workarounds for a current awkwardness:
228    generic functions can't use generic iterators that don't use the
229    same genericity that the enclosing function uses. If you're
230    reading this, Ori has probably fixed it by now.
231  */
232 const decode_all = {h : std.htab(byte[:], byte[:][:])#
233         /* 
234            This might need to become more complicated; encodedwords
235            are only allowed to appear in certain locations.
236          */
237         for (k, v) : std.byhtkeyvals(h)
238                 for var j = 0; j < v.len; ++j
239                         var new = utf8_from_encodedword(v[j])
240                         std.slfree(v[j])
241                         v[j] = new
242                 ;;
243         ;;
246 const free_all_headers = {h : std.htab(byte[:], byte[:][:])#
247         for (k, v) : std.byhtkeyvals(h)
248                 for vv : v
249                         std.slfree(vv)
250                 ;;
251                 std.slfree(k)
252         ;;
255 const free_all_params = {h : std.htab(byte[:], byte[:])#
256         for (k, v) : std.byhtkeyvals(h)
257                 std.slfree(v)
258                 std.slfree(k)
259         ;;
262 /* */
263 const parse_rfc5322_date = { s : byte[:]
264         /* First, fold whitespace */
265         var s2 : byte[:] = std.slalloc(0)
266         var last_was_ws : bool = false
267         var free_this : byte[:] = s2
268         for b : s
269                 if b < 0x20 || b > 0x7e
270                         std.slfree(free_this)
271                         -> `std.Err std.fmt("unparsable date \"{}\"", s)
272                 ;;
274                 match (b : char)
275                 | ' ':
276                         if last_was_ws
277                                 continue
278                         ;;
279                         std.slpush(&s2, (' ' : byte))
280                         last_was_ws = true
281                 | '\t':
282                         if last_was_ws
283                                 continue
284                         ;;
285                         std.slpush(&s2, (' ' : byte))
286                         last_was_ws = true
287                 | '\r':
288                         if last_was_ws
289                                 continue
290                         ;;
291                         std.slpush(&s2, (' ' : byte))
292                         last_was_ws = true
293                 | '\n':
294                         if last_was_ws
295                                 continue
296                         ;;
297                         std.slpush(&s2, (' ' : byte))
298                         last_was_ws = true
299                 | _:
300                         std.slpush(&s2, b)
301                         last_was_ws = false
302                 ;;
303         ;;
304         free_this = s2
306         /* skip [ day-of-week "," ] */
307         if s2.len > 5 && s2[3] == (',' : byte)
308                 s2 = s2[5:]
309         ;;
311         /* try with seconds */
312         match date.parsefmt("%e %b %Y %H:%M:%S %z", s2)
313         | `std.Err e:
314         | `std.Ok i:
315                 std.slfree(free_this)
316                 -> `std.Ok i
317         ;;
319         /* try without seconds */
320         match date.parsefmt("%e %b %Y %H:%M %z", s2)
321         | `std.Err e:
322         | `std.Ok i:
323                 std.slfree(free_this)
324                 -> `std.Ok i
325         ;;
327         /* perhaps the time zone is in obsolete format? */
328         if s2.len > 3
329                 for (a, b) : [
330                         ("EDT", "-0400"),
331                         ("EST", "-0500"),
332                         ("CDT", "-0500"),
333                         ("CST", "-0600"),
334                         ("MDT", "-0600"),
335                         ("MST", "-0700"),
336                         ("PDT", "-0700"),
337                         ("PST", "-0800"),
338                 ][:]
339                         if std.eq(a, s2[s2.len - a.len:])
340                                 var t = std.fmt("{} {}", s2[0:s2.len - a.len], b)
341                                 match date.parsefmt("%e %b %Y %H:%M:%S %Z", t)
342                                 | `std.Err e: std.slfree(t)
343                                 | `std.Ok i:
344                                         std.slfree(t)
345                                         std.slfree(free_this)
346                                         -> `std.Ok i
347                                 ;;
348                         ;;
349                 ;;
350         ;;
352         std.slfree(free_this)
353         -> `std.Err std.fmt("unparsable date \"{}\"", s)
356 const str_to_lower = { s : byte[:]
357         var l : std.strbuf# = std.mksb()
358         for b : s
359                 var c : char = (b : char)
360                 if c >= 'A' && c <= 'Z'
361                         std.sbputb(l, b - ('A' : byte) + ('a' : byte))
362                 else
363                         std.sbputb(l, b)
364                 ;;
365         ;;
367         -> std.sbfin(l)
370 /* */
371 const get_message = {raw : byte[:]
372         var err : std.strbuf# = std.mksb()
374         var ent : entity
375         match get_entity(raw, false)
376         | `std.Ok e: ent = e
377         | `std.Err e:
378                 std.sbfmt(err, "{}", e)
379                 std.slfree(e)
380                 goto done
381         ;;
383 :done
384         match err.len
385         | 0:
386                 std.sbfree(err)
387                 -> `std.Ok std.mk([.raw = raw, .contents = ent])
388         | _: -> `std.Err std.sbfin(err)
389         ;;
392 const get_entity = {raw : byte[:], in_digest : bool
393         var err : std.strbuf# = std.mksb()
394         var body : byte[:] = raw
395         var boundary : byte[:] = [][:]
396         var params : std.htab(byte[:], byte[:])# = std.mkht()
397         var result : entity = [
398                 .raw = [][:],
399                 .headers = std.mkht(),
400                 .body = `Single [][:],
401                 .name = `std.None,
402                 .contenttype = [][:],
403         ]
404         var child_in_digest : bool = false
405         var children : entity[:] = [][:]
407         result.raw = raw
409         /* First, figure out the headers for the whole message. */
410         match get_headers(raw)
411         | `std.Ok h:
412                 std.htfree(result.headers)
413                 result.headers = h
414         | `std.Err e:
415                 std.sbfmt(err, "malformed message: {}", e)
416                 goto done
417         ;;
419         /*
420            Special case: the content type will tell us whether to
421            expect something mixed or not, and perhaps a nice name
422          */
423         match std.htget(result.headers, "content-type")
424         | `std.None:
425                 /*
426                    Default as per RFC 2045, section 5.2, but also
427                    RFC 2046, section 5.1.1
428                  */
429                 if in_digest
430                         result.contenttype = "message/rfc822"
431                 else
432                         result.contenttype = "text/plain"
433                 ;;
434         | `std.Some s:
435                 /* s[0] is something like "foo/bar baz=quux; charset=utf-13" */
436                 result.contenttype = s[0]
437                 for var j = 0; j < result.contenttype.len; ++j
438                         if is_WSP(result.contenttype[j]) || result.contenttype[j] == (';' : byte)
439                                 result.contenttype = result.contenttype[0:j]
440                                 break
441                         ;;
442                 ;;
444                 if std.eq(result.contenttype, "multipart/digest")
445                         child_in_digest = true
446                 ;;
448                 match get_params(s[0])
449                 | `std.Err e:
450                         std.sbfmt(err, "bad header “{}”: {}", s[0], e)
451                         std.slfree(e)
452                         goto done
453                 | `std.Ok h:
454                         std.htfree(params)
455                         params = h
456                         match std.htget(params, "name")
457                         | `std.None:
458                         | `std.Some n: result.name = `std.Some std.sldup(n)
459                         ;;
460                 ;;
461         ;;
463         /*
464            Special case: the content-disposition may have a filename,
465            which is even better than a name
466          */
467         match std.htget(result.headers, "content-disposition")
468         | `std.None:
469         | `std.Some s:
470                 match get_params(s[0])
471                 | `std.Err e:
472                         std.sbfmt(err, "bad header “{}”: {}", s[0], e)
473                         std.slfree(e)
474                         goto done
475                 | `std.Ok h:
476                         match std.htget(h, "filename")
477                         | `std.None:
478                         | `std.Some new_name:
479                                 match result.name
480                                 | `std.None:
481                                 | `std.Some old_name: std.slfree(old_name)
482                                 ;;
483                                 result.name = `std.Some std.sldup(new_name)
484                         ;;
485                         free_all_params(h)
486                         std.htfree(h)
487                 ;;
488         ;;
490         /*
491            TODO: this only really works if content-transer-encoding
492            is inline. Fix that.
493          */
495         /* The header bit ends at the first CRLFCRLF */
496         match std.strfind(raw, "\r\n\r\n")
497         | `std.None: body = [][:]
498         | `std.Some j: body = raw[j + 4:]
499         ;;
501         if startswith(result.contenttype, "multipart/")
502                 match std.htget(params, "boundary")
503                 | `std.None:
504                         std.sbfmt(err, "multipart type, but no boundary")
505                         goto done
506                 | `std.Some b:
507                         boundary = std.fmt("\r\n--{}", b)
508                         var start : std.size = 0
509                         var end : std.size = 0
511                         /* Skip preamble; see RFC 2046 section 5.1.1 */
512                         match std.strfind(body, boundary)
513                         | `std.None:
514                                 std.sbfmt(err, "multipart type, boundary not present")
515                                 goto done
516                         | `std.Some j: start = j
517                         ;;
519                         /* Loop through all sub-parts (RFC 2046 for all this) */
520                         while true
521                                 /*
522                                    We have found a boundary: it's
523                                    at body[start]. We want to jump
524                                    to the end of the boundary, then
525                                    eat all linear whitespace. If
526                                    it is followed by CRLF, then we
527                                    start a new segment. If it is
528                                    followed by "--", we're done
529                                    with the whole thing. Otherwise,
530                                    error (the boundary delimiter
531                                    has appeared in the body).
532                                  */
533                                 start = start + boundary.len
534                                 while start < body.len && is_WSP(body[start])
535                                         start++
536                                 ;;
538                                 if start + 2 > body.len
539                                         std.sbfmt(err, "multipart boundary ends abruptly")
540                                         goto done
541                                 ;;
543                                 match ((body[start] : char), (body[start+1] : char))
544                                 | ('-', '-'):
545                                         /*
546                                            This is the distinguished
547                                            delimiter. We're done.
548                                          */
549                                         break
550                                 | ('\r', '\n'):
551                                         /* There is more to come. */
552                                         start = start + 2
553                                         match std.strfind(body[start:], boundary)
554                                         | `std.None:
555                                                 std.sbfmt(err, "unterminated multipart")
556                                                 goto done
557                                         | `std.Some j: end = start + j
558                                         ;;
560                                         /* Now body[start:end] is something worthy of parsing */
561                                         match get_entity(body[start:end], child_in_digest)
562                                         | `std.Ok ent:
563                                                 std.slpush(&children, ent)
564                                                 start = end
565                                         | `std.Err e:
566                                                 std.sbfmt(err, "malformed body part: {}", e)
567                                                 std.slfree(e)
568                                                 goto done
569                                         ;;
570                                 | (_, _):
571                                         std.sbfmt(err, "multipart boundary has appeared in body")
572                                         goto done
573                                 ;;
574                         ;;
575                 ;;
576                 result.body = `Multipart children
577         elif startswith(result.contenttype, "message")
578                 /*
579                    Having never seen this in the wild, I'm not sure
580                    how I want it handled. For now, let's just slurp
581                    it raw.
582                  */
583                 result.body = `Single body
584         else
585                 result.body = `Single body
586         ;;
588 :done
589         std.slfree(boundary)
590         boundary = [][:]
591         free_all_params(params)
592         std.htfree(params)
593         match err.len
594         | 0:
595                 std.sbfree(err)
596                 -> `std.Ok result
597         | _:
598                 free_all_headers(result.headers)
599                 std.htfree(result.headers)
600                 -> `std.Err std.sbfin(err)
601         ;;
604 const startswith = {s : byte[:], prefix : byte[:]
605         if s.len < prefix.len
606                 -> false
607         ;;
609         -> std.eq(s[:prefix.len], prefix)
612 type rfc2231_state = union
613         `Just_saw_semicolon
614         `Reading_attribute
615         `Just_saw_asterisk
616         `Reading_section
617         `Just_saw_equals
618         `Reading_boring_value
619         `Reading_encoded_value
620         `Reading_quoted_value
621         `Finished_a_param
624 /* */
625 const get_params = {raw : byte[:]
626         var err : std.strbuf# = std.mksb()
627         var params : std.htab(byte[:], byte[:])# = std.mkht()
628         var keys_with_continuations : byte[:][:] = [][:]
629         var keys_needing_decoding : byte[:][:] = [][:]
630         var j : std.size = 0
632         /*
633            Our state machine isn't completely pure, we need a few
634            variables to guide the transitions.
635          */
636         var state : rfc2231_state = `Just_saw_semicolon
637         var is_sectioned : bool = false
638         var is_initial_section : bool = false
639         var is_extended : bool = false
640         var attr_start : std.size = 0
641         var attr : byte[:] = [][:]
642         var attr_sans_asterisk : byte[:] = [][:]
643         var section_start : std.size = 0
644         var value_start : std.size = 0
645         var quoted_buf : std.strbuf# = std.mksb()
647         match std.strfind(raw, ";")
648         | `std.None:
649                 raw = [][:]
650                 goto done
651         | `std.Some k: j = k
652         ;;
654         /* Let's tack an extra ";" onto raw just to make cleaning out params easier */
655         raw = std.fmt("{};", raw)
657         while j + 1 < raw.len
658                 j++
659                 var c : char = (raw[j] : char)
660                 match state
661                 | `Just_saw_semicolon:
662                         if is_WSP_or_nl(raw[j])
663                                 continue
664                         elif is_attribute_char(raw[j])
665                                 state = `Reading_attribute
666                                 attr_start = j
667                         else
668                                 std.sbfmt(err, "illegal byte in attribute")
669                                 break
670                         ;;
671                 | `Reading_attribute:
672                         if c == '*'
673                                 state = `Just_saw_asterisk
674                                 attr_sans_asterisk = raw[attr_start:j]
675                         elif c == '='
676                                 attr = raw[attr_start:j]
677                                 attr_sans_asterisk = raw[attr_start:j]
678                                 state = `Just_saw_equals
679                         elif is_attribute_char(raw[j])
680                                 continue
681                         else
682                                 std.sbfmt(err, "illegal byte in attribute")
683                                 break
684                         ;;
685                 | `Just_saw_asterisk:
686                         if c == '='
687                                 attr = raw[attr_start:j]
688                                 is_extended = true
689                                 state = `Just_saw_equals
690                         elif raw[j] >= ('0' : byte) && raw[j] <= ('9' : byte)
691                                 is_sectioned = true
692                                 section_start = j
693                                 state = `Reading_section
694                         else
695                                 std.sbfmt(err, "illegal byte in attribute after '*'")
696                                 break
697                         ;;
698                 | `Reading_section:
699                         if c == '='
700                                 is_initial_section = std.eq(raw[section_start:j], "0")
701                                 attr = raw[attr_start:j]
702                                 state = `Just_saw_equals
703                         elif raw[j] >= ('0' : byte) && raw[j] <= ('9' : byte)
704                                 continue
705                         else
706                                 std.sbfmt(err, "illegal byte in attribute after '*'")
707                                 break
708                         ;;
709                 | `Just_saw_equals:
710                         if is_extended && (!is_sectioned || is_initial_section)
711                                 match std.strfind(raw[j:], "'")
712                                 | `std.None:
713                                         std.sbfmt(err, "unterminated charset")
714                                         break
715                                 | `std.Some k:
716                                         k += j
717                                         if !std.eq(raw[j:k], "utf-8") && !std.eq(raw[j:k], "us-ascii")
718                                                 std.sbfmt(err, "unsupported charset {}", raw[j:k])
719                                                 break
720                                         ;;
721                                         j = k + 1
722                                 ;;
724                                 match std.strfind(raw[j:], "'")
725                                 | `std.None:
726                                         std.sbfmt(err, "unterminated language")
727                                         break
728                                 | `std.Some k:
729                                         /* Completely ignore language. */
730                                         j = j + k
731                                         state = `Reading_encoded_value
732                                         value_start = j + 1
733                                 ;;        
734                                 
735                         else
736                                 if c == '"'
737                                         state = `Reading_quoted_value
738                                         value_start = j + 1
739                                 else
740                                         state = `Reading_boring_value
741                                         value_start = j
742                                 ;;
743                         ;;
744                 | `Reading_boring_value:
745                         if c == ';'
746                                 var klower : byte[:] = str_to_lower(attr)
747                                 if std.hthas(params, klower)
748                                         std.sbfmt(err, "duplicate attribute “{}”", klower)
749                                         std.slfree(klower)
750                                         break
751                                 ;;
752                                 std.htput(params, klower, std.sldup(raw[value_start:j]))
753                                 if is_extended
754                                         ensure_in(&keys_needing_decoding, attr)
755                                 ;;
756                                 if is_sectioned
757                                         var q : byte[:] = [][:]
758                                         if is_extended
759                                                 q = std.fmt("{}*", attr_sans_asterisk)
760                                         else
761                                                 q = std.sldup(attr_sans_asterisk)
762                                         ;;
763                                         ensure_in(&keys_with_continuations, q)
764                                         std.slfree(q)
765                                 ;;
766                                 state = `Finished_a_param
767                                 j--
768                         elif is_token_char(raw[j])
769                                 continue
770                         else
771                                 std.sbfmt(err, "illegal character in param value")
772                                 break
773                         ;;
774                 | `Reading_encoded_value:
775                         if c == ';'
776                                 var klower : byte[:] = str_to_lower(attr)
777                                 if std.hthas(params, klower)
778                                         std.sbfmt(err, "duplicate attribute “{}”", klower)
779                                         std.slfree(klower)
780                                         break
781                                 ;;
782                                 std.htput(params, klower, std.sldup(raw[value_start:j]))
783                                 ensure_in(&keys_needing_decoding, attr)
784                                 if is_sectioned
785                                         var q : byte[:] = std.fmt("{}*", attr_sans_asterisk)
786                                         ensure_in(&keys_with_continuations, q)
787                                         std.slfree(q)
788                                 ;;
789                                 state = `Finished_a_param
790                                 j--
791                         elif c == '%'
792                                 if j + 2 >= raw.len
793                                         std.sbfmt(err, "extended octet ends prematurely")
794                                         break
795                                 ;;
797                                 if !is_octet_char(raw[j+1]) || !is_octet_char(raw[j+2])
798                                         std.sbfmt(err, "illegal byte in extended octet")
799                                         break
800                                 ;;
801                                 j = j + 2
802                         elif is_attribute_char(raw[j])
803                                 /*
804                                    I find it odd that this is
805                                    "attribute char" instead of
806                                    "token". RFC 2231, section 7,
807                                    "extended-other-values"
808                                  */
809                                 continue
810                         else
811                                 std.sbfmt(err, "illegal byte in extended parameter")
812                                 break
813                         ;;
814                 | `Reading_quoted_value:
815                         if c == '"'
816                                 var klower : byte[:] = str_to_lower(attr)
817                                 if std.hthas(params, klower)
818                                         std.sbfmt(err, "duplicate attribute “{}”", klower)
819                                         std.slfree(klower)
820                                         break
821                                 ;;
822                                 std.htput(params, klower, std.sbfin(quoted_buf))
823                                 quoted_buf = std.mksb()
824                                 if is_extended
825                                         ensure_in(&keys_needing_decoding, attr)
826                                 ;;
827                                 if is_sectioned
828                                         var q : byte[:] = [][:]
829                                         if is_extended
830                                                 q = std.fmt("{}*", attr_sans_asterisk)
831                                         else
832                                                 q = std.sldup(attr_sans_asterisk)
833                                         ;;
834                                         ensure_in(&keys_with_continuations, q)
835                                         std.slfree(q)
836                                 ;;
837                                 state = `Finished_a_param
838                         elif c == '\\'
839                                 if j + 1 >= raw.len
840                                         std.sbfmt(err, "quoted pair ends abruptly")
841                                         break
842                                 ;;
843                                 std.sbputb(quoted_buf, raw[j+1])
844                                 j++
845                         else
846                                 std.sbputb(quoted_buf, raw[j])
847                         ;;
848                 | `Finished_a_param:
849                         if c != ';'
850                                 std.sbfmt(err, "expected ‘;’ after parameter")
851                                 break
852                         ;;
854                         /* Reset everything */
855                         state = `Just_saw_semicolon
856                         is_sectioned = false
857                         is_initial_section = false
858                         is_extended = false
859                         attr_start = 0
860                         attr = [][:]
861                         attr_sans_asterisk = [][:]
862                         section_start = 0
863                         value_start = 0
864                         std.sbtrim(quoted_buf, 0)
865                 ;;
866         ;;
868         if err.len > 0
869                 goto done
870         ;;
872         /*
873            We now need to decode and join things a bit carefully.
875            First, Because params don't follow any order and only
876            the *0 section carries decoding information, we needed
877            to store them all before decoding any, and we want to
878            join before decoding.
880            (This does not contradict the remarks of RFC 2231, section
881            4, because concatenating quoted strings and encoded
882            strings will produce a result that decodes correctly.)
884            Second, since the "*N" comes before the "*" in the param
885            name, we have to be a bit awkward about joining.
886          */
887         for k : keys_with_continuations
888                 var ksa : byte[:] = k
889                 is_extended = false
890                 if ksa[ksa.len - 1] == ('*' : byte)
891                         ksa = ksa[:ksa.len - 1]
892                         is_extended = true
893                 ;;
895                 var n : int = 0
896                 var sb : std.strbuf# = std.mksb()
897                 while true
898                         var k2 : byte[:] = [][:]
899                         if is_extended
900                                 k2 = std.fmt("{}*{}*", ksa, n)
901                         else
902                                 k2 = std.fmt("{}*{}", ksa, n)
903                         ;;
905                         match std.htget(params, k2)
906                         | `std.None:
907                                 if std.hthas(params, k)
908                                         std.sbfmt(err, "duplicate attribute “{}”", k)
909                                         goto done
910                                 ;;
911                                 std.htput(params, k, std.sbfin(sb))
912                                 break
913                         | `std.Some s:
914                                 std.sbfmt(sb, "{}", s)
915                                 
916                         ;;
917                 ;;
918         ;;
919         std.slfree(keys_with_continuations)
920         keys_with_continuations = [][:]
922         /* Now we've joined everything, so we can decode it */
923         for k : keys_needing_decoding
924                 if k.len < 2 || k[k.len - 1] != ('*' : byte)
925                         /* Impossible */
926                         continue
927                 ;;
929                 /* TODO: handle more than utf-8 here */
930                 var val : byte[:] = [][:]
931                 match std.htget(params, k)
932                 | `std.None: continue
933                 | `std.Some s:
934                         match utf8_from_octet(s)
935                         | `std.Ok u: val = u
936                         | `std.Err void:
937                                 std.sbfmt(err, "invalid utf-8 “{}”", s)
938                                 goto done
939                         ;;
940                 ;;
942                 var ksa : byte[:] = str_to_lower(k[:k.len - 1])
943                 if std.hthas(params, ksa)
944                         std.sbfmt(err, "duplicate attribute “{}”", ksa)
945                         goto done
946                 ;;
947                 std.htput(params, ksa, val)
948         ;;
949         std.slfree(keys_needing_decoding)
950         keys_needing_decoding = [][:]
952         /* Now we've decoded everything, so we can remove all the intermediate keys */
953         for (k, v) : std.byhtkeyvals(params)
954                 match std.strfind(k, "*")
955                 | `std.None:
956                 | `std.Some _:
957                         std.htdel(params, k)
958                         std.slfree(k)
959                         std.slfree(v)
960                 ;;
961         ;;
963 :done
964         std.slfree(keys_with_continuations)
965         std.slfree(keys_needing_decoding)
966         std.sbfree(quoted_buf)
968         /* TODO: remove the slfill. It's just salting the earth to make sure I sldup()d things right */
969         std.slfill(raw, ('Z' : byte))
970         std.slfree(raw)
972         match err.len
973         | 0:
974                 std.sbfree(err)
975                 -> `std.Ok params
976         | _:
977                 free_all_params(params)
978                 std.htfree(params)
979                 -> `std.Err std.sbfin(err)
980         ;;
983 const ensure_in = {list : byte[:][:]#, value : byte[:]
984         var lc_val : byte[:] = str_to_lower(value)
985         for v : list#
986                 if std.eq(v, lc_val)
987                         std.slfree(lc_val)
988                         -> void
989                 ;;
990         ;;
992         std.slpush(list, lc_val)
995 /* See RFC 2231, section 7, and RFC 2045 section 5.1 */
996 const is_attribute_char = {b : byte
997         /* CTRL, SPACE, and and non-US-ASCII */
998         if b <= 0x20 || b > 0x7e
999                 -> false
1000         ;;
1002         /* ":" to "@" */
1003         if b >= 0x3a && b <= 0x40
1004                 -> false
1005         ;;
1007         /* "[" to "]" */
1008         if b >= 0x5b && b <= 0x5d
1009                 -> false
1010         ;;
1012         /* "'" to ")" */
1013         if b >= 0x27 && b <= 0x29
1014                 -> false
1015         ;;
1017         /* "/" and "," */
1018         if b == 0x2f || b == 0x2c
1019                 -> false
1020         ;;
1022         -> true
1025 const is_token_char = {b : byte
1026         /* CTRL, SPACE, and non-US-ASCII */
1027         if b <= 0x20 || b > 0x7e
1028                 -> false
1029         ;;
1031         /* ":" to "@" */
1032         if b >= 0x3a && b <= 0x40
1033                 -> false
1034         ;;
1036         /* "[" to "]" */
1037         if b >= 0x5b && b <= 0x5d
1038                 -> false
1039         ;;
1041         /* "(" or ")" or "/" or "," */
1042         if b == 0x28 || b == 0x29 || b == 0x2f || b == 0x2c
1043                 -> false
1044         ;;
1046         -> true
1049 const is_octet_char = {b : byte
1050         /* 0 through 9 */
1051         if b >= 0x30 && b <= 0x39
1052                 -> true
1053         ;;
1055         /* A through F */
1056         if b >= 0x41 && b <= 0x46
1057                 -> true
1058         ;;
1060         -> false
1063 const utf8_from_octet = {s : byte[:]
1064         var sb : std.strbuf# = std.mksb()
1065         for var j = 0; j < s.len; ++j
1066                 match (s[j] : char)
1067                 | '%':
1068                         var b : byte = 0
1070                         if j + 2 >= s.len
1071                                 -> `std.Err void
1072                         ;;
1074                         var b1 : byte = s[j + 1]
1075                         var b2 : byte = s[j + 2]
1077                         if b1 >= ('0' : byte) && b1 <= ('9' : byte)
1078                                 b += (b1 - ('0' : byte)) * 0x10
1079                         elif b1 >= ('A' : byte) && b1 <= ('F' : byte)
1080                                 b += (b1 - ('A' : byte) + 0x0a) * 0x10
1081                         else
1082                                 -> `std.Err void
1083                         ;;
1085                         if b2 >= ('0' : byte) && b2 <= ('9' : byte)
1086                                 b += (b2 - ('0' : byte))
1087                         elif b2 >= ('A' : byte) && b2 <= ('F' : byte)
1088                                 b += (b2 - ('A' : byte) + 0x0a)
1089                         else
1090                                 -> `std.Err void
1091                         ;;
1092                         std.sbputb(sb, b)
1093                         j = j + 2
1094                 | _: std.sbputb(sb, s[j])
1095                 ;;
1096         ;;
1098         if !util.non_ctrl_utf8(std.sbpeek(sb))
1099                 -> `std.Err void
1100         ;;
1102         -> `std.Ok std.sbfin(sb)
1105 /* */
1106 const free_message = {m : message#
1107         free_entity(m.contents)
1108         std.slfree(m.raw)
1111 const free_entity = {e : entity
1112         /*
1113            Don't free raw, it belongs to the message containing
1114            this entity
1115          */
1116         for (k, v) : std.byhtkeyvals(e.headers)
1117                 std.slfree(k)
1118                 for vv : v
1119                         std.slfree(vv)
1120                 ;;
1121                 std.slfree(v)
1122         ;;
1123         std.htfree(e.headers)
1125         /* Name was sldup()d, must be freed */
1126         match e.name
1127         | `std.None:
1128         | `std.Some n:
1129                 std.slfree(n)
1130                 e.name = `std.None
1131         ;;
1133         /* */
1134         match e.body
1135         | `Single(_): /* No need, a subset of raw */
1136         | `Multipart(es):
1137                 for ee : es
1138                         free_entity(ee)
1139                 ;;
1140                 std.slfree(es)
1141         ;;