From a41dc214e4f72d00b0dd8c1381089328df252a1e Mon Sep 17 00:00:00 2001 From: Mauro Iazzi Date: Tue, 19 May 2009 18:16:52 +0200 Subject: [PATCH] fixed some captures fixed segfault in Csg by swapping captures dot-atom is a substitution-capture phrase is a substitution-capture domain-literal is a substitution-capture added a few tests --- mime.lua | 12 ++++++------ test.lua | 10 ++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/mime.lua b/mime.lua index d00bc47..6aa9e5c 100644 --- a/mime.lua +++ b/mime.lua @@ -41,11 +41,11 @@ local lpeg = lpeg local print = print -- this creates a named and substitution capture for the given pattern -lpeg.Csg = function (patt) - return #lpeg.Cs(patt) * lpeg.Ct(patt) +lpeg.Csg = function (patt, name) + return (#lpeg.Cg(patt, name)) * lpeg.Cs(patt) end -- this collects both the table of named captures and the --- substitution captureof the given pattern +-- substitution capture of the given pattern lpeg.Cst = function (patt) return (#lpeg.Cs(patt) * lpeg.Ct(patt)) / function (s, t) local ret = {} @@ -207,7 +207,7 @@ local lex_tokens = function () -- an atom is equal to the content only discarding comments and whitespace ["atom"] = V("CFWS")^-1 * C(V("atext")^1) * V("CFWS")^-1, -- RFC 2822 Section 3.2.4 -- an atom with dots is only the content, discarding CFWSs - ["dot-atom"] = Cs((V("CFWS")^-1/'') * C(V("dot-atom-text")) * (V("CFWS")^-1/'')), -- RFC 2822 Section 3.2.4 + ["dot-atom"] = V("CFWS")^-1 * C(V("dot-atom-text")) * V("CFWS")^-1, -- RFC 2822 Section 3.2.4 -- the content of an atom text with dots ["dot-atom-text"] = V("atext")^1 * (P(".") * V("atext")^1)^0, -- RFC 2822 Section 3.2.4 -- character that can appear in a quoted string @@ -228,7 +228,7 @@ local lex_tokens = function () -- an generic word ["word"] = V("atom") + V("quoted-string"), -- RFC 2822 Section 3.2.6 -- an generic phrase - ["phrase"] = V("word")^1 + V("obs-phrase"), -- RFC 2822 Section 3.2.6 + ["phrase"] = Cs(V("word")^1) + V("obs-phrase"), -- RFC 2822 Section 3.2.6 -- a character for unstructured text ["utext"] = V("NO-WS-CTL") + R("\33\126") + V("obs-utext"), -- RFC 2822 Section 3.2.6 -- an unstructured text @@ -280,7 +280,7 @@ local address = function () ["addr-spec"] = Cst(V"local-part" * P"@" * V"domain"), -- RFC 2822 Section 3.4.1 ["local-part"] = Csg(V"dot-atom" + V"quoted-string" + V"obs-local-part", "box"), -- RFC 2822 Section 3.4.1 ["domain"] = Cg(V"dot-atom" + V"domain-literal" + V"obs-domain", "domain"), -- RFC 2822 Section 3.4.1 - ["domain-literal"] = V"CFWS"^-1 * P"[" * (V"FWS"^-1 * V"dcontent")^0 * V"FWS"^-1 * P"]" * V"CFWS"^-1, -- RFC 2822 Section 3.4.1 + ["domain-literal"] = Cs(V"CFWS"^-1 * P"[" * (V"FWS"^-1 * V"dcontent")^0 * V"FWS"^-1 * P"]" * V"CFWS"^-1), -- RFC 2822 Section 3.4.1 ["dcontent"] = V"dtext" + V"quoted-pair", -- RFC 2822 Section 3.4.1 ["dtext"] = V"NO-WS-CTL" + R"\33\90" + R"\94\126", -- RFC 2822 Section 3.4.1 } diff --git a/test.lua b/test.lua index 8ca588a..0010da6 100644 --- a/test.lua +++ b/test.lua @@ -46,9 +46,10 @@ local tests = { { string = '2nd{atom}2.&checking!', rule ='dot-atom', match = true, capture = '2nd{atom}2.&checking!'}, { string = '\r\n (another comment ) \t\r\n (id %(crmnr) \\( ** \r\n ) ', rule ='CFWS', match = true, capture = ' '}, { string = '()', rule ='CFWS', match = true, capture = ' '}, - { string = 'Icanwrite"a \r\n simple"phrase', rule ='phrase', match = true}, + { string = 'Icanwrite"a \r\n simple"phrase', rule ='phrase', match = true, capture='Icanwrite"a simple"phrase'}, { string = 'Thu, 2\r\n Apr 2009 14:36:04 +0000', rule ='date-time', match = true, capture={ 'Thu, 2 Apr 2009 14:36:04 +0000', weekday='Thu', year='2009', month='Apr', day='2', zone='+0000', minute='36', hour='14', second='04' }}, - { string = '(a CFWS) \r\n (that ends here)[ 127.0.0.1:8888 oooo]', rule ='domain-literal', match = true, }, + { string = '(a CFWS) \r\n (that ends here)[ 127.0.0.1:8888 \r\n oooo]', rule ='domain-literal', match = true, capture=' [ 127.0.0.1:8888 oooo]'}, + { string = 'mauro.iazzi@gmail.com', rule ='addr-spec', match = true, capture={ [1]='mauro.iazzi@gmail.com', box='mauro.iazzi', domain='gmail.com' }, }, { string = 'mauro.iazzi@gmail.com', rule ='address', match = true, capture={ [1]='mauro.iazzi@gmail.com', box='mauro.iazzi', domain='gmail.com' }, }, { string = '"Mauro Iazzi" <"mauro\\2\r\n .iazzi"@gmail.com>', rule ='address', match = true, capture={ [1]='"Mauro Iazzi" <"mauro2.iazzi"@gmail.com>', box='mauro.iazzi', domain='gmail.com' }, }, { string = '"Mauro \r\n Iazzi" (a comment?) ', rule ='address', match = true, capture={ [1]='"Mauro Iazzi" ', box='mauro.iazzi', domain='gmail.com' }, }, @@ -57,7 +58,7 @@ local tests = { } local dee = function (...) - --print(...) + print(...) return ... end @@ -67,7 +68,7 @@ local gather = function (...) end local function equal (a, b) - --print('', a, b) + dee('', a, b) if a==b then return true end if type(a)~=type(b) then return false end if type(a)=='table' then @@ -104,6 +105,7 @@ end print("starting the tests ...") for i, t in ipairs(tests) do io.write('performing test ' .. i .. ' on rule ' .. tostring(t.rule)) + io.flush() local st, err = pcall(do_test, i, t) if st then err = 'success' end print('', err) -- 2.11.4.GIT