5 -- MIME implementation in Lua
12 local tostring = tostring
14 local setfenv
= setfenv
18 local join
= function (...)
20 for i
= 1, select('#', ...) do
21 ret
= ret
.. tostring(select(i
, ...))
26 -- rfc2822 Section 2.1:
28 -- A message consists of header fields (collectively called "the header
29 -- of the message") followed, optionally, by a body. The header is a
30 -- sequence of lines of characters with special syntax as defined in
31 -- this standard. The body is simply a sequence of characters that
32 -- follows the header and is separated from the header by an empty line
33 -- (i.e., a line with nothing preceding the CRLF).
35 local split_content
= function (c
)
36 -- NOTE: there is no guarantee that the CRLF separating header
37 -- and body is the first one. However no headers admit a blank
39 local h
, b
= string.match(c
, '^(.-)'..CRLF
..CRLF
..'(.*)$')
40 assert(type(h
)=='string' and type(b
)=='string')
44 -- rfc2822 Section 2.2.3
46 -- The process of moving from this folded multiple-line representation
47 -- of a header field to its single line representation is called
48 -- "unfolding". Unfolding is accomplished by simply removing any CRLF
49 -- that is immediately followed by WSP. Each header field should be
50 -- treated in its unfolded form for further syntactic and semantic
53 local unfold_header
= function (h
)
54 return string.gsub(h
, CRLF
..'[ \t]', ' ')
57 -- rfc2822 Section 2.1
59 -- At the most basic level, a message is a series of characters. A
60 -- message that is conformant with this standard is comprised of
61 -- characters with values in the range 1 through 127 and interpreted as
62 -- US-ASCII characters [ASCII]. For brevity, this document sometimes
63 -- refers to this range of characters as simply "US-ASCII characters".
66 -- rfc2822 Section 2.1.1
68 -- There are two limits that this standard places on the number of
69 -- characters in a line. Each line of characters MUST be no more than
70 -- 998 characters, and SHOULD be no more than 78 characters, excluding
73 local check_content
= function (c
)
75 -- TODO: implement in a portable way
79 -- rfc2822 Section 2.3
81 -- The body of a message is simply lines of US-ASCII characters. The
82 -- only two limitations on the body are as follows:
84 -- - CR and LF MUST only occur together as CRLF; they MUST NOT appear
85 -- independently in the body.
87 -- - Lines of characters in the body MUST be limited to 998 characters,
89 -- and SHOULD be limited to 78 characters, excluding the CRLF.
91 local check_body
= function (b
)
97 -- some core values (not implemented literally)
98 -- see RFC 2234 Section 6.1
100 ["ALPHA"] = lpeg
.R("AZ", "az"),
101 ["BIT"] = lpeg
.P("0") + lpeg
.P("1"),
102 ["CHAR"] = lpeg
.R("\01\127"),
103 ["CR"] = lpeg
.P("\13"),
104 ["CRLF"] = lpeg
.P("\13\10"),
105 ["DIGIT"] = lpeg
.R("09"),
106 ["LF"] = lpeg
.P("\10"),
107 ["WSP"] = lpeg
.S("\32\09"),
110 -- lexical tokens used in the specification
111 -- TODO: could be optimized
112 -- check for non-obfuscated optimizations
113 -- TODO: write in a less obfuscated way
114 -- check if parenthesis can be removed
115 local lex_tokens
= function ()
118 -- control characters without whitespaces
119 ["NO-WS-CTL"] = R("\1\8") +
123 P("\127"), -- RFC 2822 Section 3.2.1
124 -- a character in a text
125 ["text"] = R("\1\9") +
128 R("\14\127"), -- RFC 2822 Section 3.2.1
129 -- a special character
130 ["specials"] = P("(") + P(")") +
136 P("\""), -- RFC 2822 Section 3.2.1
137 -- a quoted pair should return only the second character
138 ["quoted-pair"] = (P("\\") * C(V("text"))) + V("obs-qp"), -- RFC 2822 Section 3.2.2
139 -- a folding white space (a whitespace that can include a CRLF)
140 -- should be substituted by a single whitespace
141 ["FWS"] = (((V("WSP")^
0 * V("CRLF"))^
-1 * V("WSP")^
1) + V("obs-FWS")) / " ", -- RFC 2822 Section 3.2.3
142 -- a text character allowed inside a comment
143 ["ctext"] = V("NO-WS-CTL") +
146 R("\93\126"), -- RFC 2822 Section 3.2.3
147 -- the content of a comment (comments can nest)
148 ["ccontent"] = V("ctext") + V("quoted-pair") + V("comment"), -- RFC 2822 Section 3.2.3
150 -- should be substituted by a single whitespace
151 ["comment"] = (P("(") * (V("FWS")^
-1 * V("ccontent"))^
0 * V("FWS")^
-1 * P(")")) / " ", -- RFC 2822 Section 3.2.3
152 -- a comment or a folding white space
153 -- should be substituted by a single whitespace
155 -- Folding white spaces should not be placed in a way that
156 -- creates lines containing only whitespaces.
157 -- This requirement Is not necessarily enforced by this grammar
158 ["CFWS"] = ( (V("FWS") + (V("comment")*V("FWS")^
-1)) * (V("comment")*V("FWS")^
-1)^
0 ) / " ", -- RFC 2822 Section 3.2.3
159 -- character that can appear in an atom
160 ["atext"] = V("ALPHA") + V("DIGIT") +
170 P("~"), -- RFC 2822 Section 3.2.4
171 -- an atom is equal to the content only discarding comments and whitespace
172 ["atom"] = V("CFWS")^
-1 * C(V("atext")^
1) * V("CFWS")^
-1, -- RFC 2822 Section 3.2.4
173 -- an atom with dots is only the content
174 ["dot-atom"] = V("CFWS")^
-1 * C(V("dot-atom-text")) * V("CFWS")^
-1, -- RFC 2822 Section 3.2.4
175 -- the content of an atom text with dots
176 ["dot-atom-text"] = V("atext")^
1 * (P(".") * V("atext")^
1)^
0, -- RFC 2822 Section 3.2.4
177 -- character that can appear in a quoted string
178 ["qtext"] = V("NO-WS-CTL") +
181 R("\93\126"), -- RFC 2822 Section 3.2.5
182 -- character or quoted pair (both can appear in a quoted string)
183 -- it is equivalent to the character itself or to the result of the
185 ["qcontent"] = C(V("qtext")) + V("quoted-pair"), -- RFC 2822 Section 3.2.5
186 -- a quoted string is equal to its content
187 ["quoted-string"] = V("CFWS")^
-1 *
188 P("\"") * ((V("FWS")^
-1 * V("qcontent"))^
0 * V("FWS")^
-1)/join
* P("\"") * V("CFWS")^
-1, -- RFC 2822 Section 3.2.5
189 -- unstructured patterns for unspecified headers
190 -- what should these be equal to?
193 ["word"] = V("atom") + V("quoted-string"), -- RFC 2822 Section 3.2.6
195 ["phrase"] = V("word")^
1 + V("obs-phrase"), -- RFC 2822 Section 3.2.6
196 -- a character for unstructured text
197 ["utext"] = V("NO-WS-CTL") + R("\33\126") + V("obs-utext"), -- RFC 2822 Section 3.2.6
198 -- an unstructured text
199 ["unstructured"] = (V("FWS")^
-1 * V("utext"))^
0 * V("FWS")^
-1, -- RFC 2822 Section 3.2.6
203 local date_time
= function (format)
204 format = format or join
207 -- date and time specification
208 -- dates and times should be valid
209 -- this grammar does not enforce this yet
210 ["date-time"] = Cs(( V
"day-of-week" * C(P
",") )^
-1 * V
"date" * V
"FWS" * V
"time" * V
"CFWS"^
-1), -- RFC 2822 Section 3.3
211 ["day-of-week"] = ( V
"FWS"^
-1 * V
"day-name" ) + V
"obs-day-of-week", -- RFC 2822 Section 3.3
212 ["day-name"] = P
"Mon" + P
"Tue" + P
"Wed" + P
"Thu" + P
"Fri" + P
"Sat" + P
"Sun", -- RFC 2822 Section 3.3
213 ["date"] = V
"day" * V
"month" * V
"year", -- RFC 2822 Section 3.3
214 ["year"] = C(V
"DIGIT"^
4) + V
"obs-year", -- RFC 2822 Section 3.3
215 ["month"] = (V
"FWS" * V
"month-name" * V
"FWS") + V
"obs-month", -- RFC 2822 Section 3.3
216 ["month-name"] = P
"Jan" + P
"Feb" + P
"Mar" + P
"Apr" + P
"May" + P
"Jun" + P
"Jul" + P
"Aug" + P
"Sep" + P
"Oct" + P
"Nov" + P
"Dec", -- RFC 2822 Section 3.3
217 ["day"] = (V
"FWS"^
-1 * C(V
"DIGIT" * V
"DIGIT"^
-1)) + V
"obs-day", -- RFC 2822 Section 3.3
218 ["time"] = V
"time-of-day" * V
"FWS" * V
"zone", -- RFC 2822 Section 3.3
219 ["time-of-day"] = V
"hour" * P
":" * V
"minute" * (P
":" * V
"second")^
-1, -- RFC 2822 Section 3.3
220 ["hour"] = (V
"DIGIT" * V
"DIGIT") + V
"obs-hour", -- RFC 2822 Section 3.3
221 ["minute"] = (V
"DIGIT" * V
"DIGIT") + V
"obs-minute", -- RFC 2822 Section 3.3
222 ["second"] = (V
"DIGIT" * V
"DIGIT") + V
"obs-second", -- RFC 2822 Section 3.3
223 ["zone"] = ( (P
"+" + P
"-") * V
"DIGIT" * V
"DIGIT" * V
"DIGIT" * V
"DIGIT") + V
"obs-second", -- RFC 2822 Section 3.3
227 local address
= function (format)
228 format = format or join
231 -- address specification
232 -- dates and times should be valid
233 -- this grammar does not enforce this yet
234 ["address"] = V
"mailbox" + V
"group", -- RFC 2822 Section 3.4
235 ["mailbox"] = V
"name-addr" + V
"addr-spec", -- RFC 2822 Section 3.4
236 ["name-addr"] = V
"display-name"^
-1 * V
"angle-addr", -- RFC 2822 Section 3.4
237 ["angle-addr"] = (V
"CFWS"^
-1 * P
"<" * V
"addr-spec" * P
">" * V
"CFWS"^
-1) + V
"obs-angle-addr", -- RFC 2822 Section 3.4
238 ["group"] = V
"display-name" * P
":" * (V
"mailbox-list" + V
"CFWS") * P
";" * V
"CFWS"^
-1, -- RFC 2822 Section 3.4
239 ["display-name"] = V
"phrase", -- RFC 2822 Section 3.4
240 ["mailbox-list"] = (V
"mailbox" * (P
"," * V
"mailbox")^
0) + V
"obs-mailbox-list", -- RFC 2822 Section 3.4
241 ["address-list"] = (V
"address" * (P
"," * V
"address")^
0) + V
"obs-address-list", -- RFC 2822 Section 3.4
242 -- address specification (name@host.domain)
243 ["addr-spec"] = V
"local-part" * P
"@" * V
"domain", -- RFC 2822 Section 3.4.1
244 ["local-part"] = V
"dot-atom" + V
"quoted-string" + V
"obs-local-part", -- RFC 2822 Section 3.4.1
245 ["domain"] = V
"dot-atom" + V
"domain-literal" + V
"obs-domain", -- RFC 2822 Section 3.4.1
246 ["domain-literal"] = V
"CFWS"^
-1 * P
"[" * (V
"FWS"^
-1 * V
"dcontent")^
0 * V
"FWS"^
-1 * P
"]" * V
"CFWS"^
-1, -- RFC 2822 Section 3.4.1
247 ["dcontent"] = V
"dtext" + V
"quoted-pair", -- RFC 2822 Section 3.4.1
248 ["dtext"] = V
"NO-WS-CTL" + R
"\33\90" + R
"\94\126", -- RFC 2822 Section 3.4.1
253 ["obs-FWS"] = lpeg
.P(false),
254 ["obs-qp"] = lpeg
.P(false),
255 ["obs-phrase"] = lpeg
.P(false),
256 ["obs-utext"] = lpeg
.P(false),
257 ["obs-day-of-week"] = lpeg
.P(false),
258 ["obs-year"] = lpeg
.P(false),
259 ["obs-month"] = lpeg
.P(false),
260 ["obs-day"] = lpeg
.P(false),
261 ["obs-hour"] = lpeg
.P(false),
262 ["obs-minute"] = lpeg
.P(false),
263 ["obs-second"] = lpeg
.P(false),
264 ["obs-angle-addr"] = lpeg
.P(false),
265 ["obs-mailbox-list"] = lpeg
.P(false),
266 ["obs-address-list"] = lpeg
.P(false),
267 ["obs-local-part"] = lpeg
.P(false),
268 ["obs-domain"] = lpeg
.P(false),
271 local os
= setmetatable({}, {__index
= function() return lpeg
.P(false) end})
273 local join_set
= function (...)
274 local n
= select('#', ...)
277 local t
= select(i
, ...)
278 if type(t
)=='table' then
279 for k
, v
in pairs(t
) do
282 elseif type(t
)=='string' then
284 elseif type(t
)=='boolean' then
285 -- TODO: check no overwrite
287 error('join_set: bad argument number '..i
..' of type '..type(t
))
293 local gr
= join_set(core_values
, lex_tokens(), date_time(), address(), obs_strict
)