5 # Copyright (c) 1998-2003 Minero Aoki <aamine@loveruby.net>
7 # Permission is hereby granted, free of charge, to any person obtaining
8 # a copy of this software and associated documentation files (the
9 # "Software"), to deal in the Software without restriction, including
10 # without limitation the rights to use, copy, modify, merge, publish,
11 # distribute, sublicense, and/or sell copies of the Software, and to
12 # permit persons to whom the Software is furnished to do so, subject to
13 # the following conditions:
15 # The above copyright notice and this permission notice shall be
16 # included in all copies or substantial portions of the Software.
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 # Note: Originally licensed under LGPL v2+. Using MIT license for Rails
27 # with permission of Minero Aoki.
30 require 'tmail/config'
47 atomsyms = %q[ _#!$%&`'*+-{|}~^@/=? ].strip
48 tokensyms = %q[ _#!$%&`'*+-{|}~^@. ].strip
50 atomchars = alnum + Regexp.quote(atomsyms)
51 tokenchars = alnum + Regexp.quote(tokensyms)
52 iso2022str = '\e(?!\(B)..(?:[^\e]+|\e(?!\(B)..)*\e\(B'
54 eucstr = '(?:[\xa1-\xfe][\xa1-\xfe])+'
55 sjisstr = '(?:[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc])+'
56 utf8str = '(?:[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf])+'
58 quoted_with_iso2022 = /\A(?:[^\\\e"]+|#{iso2022str})+/n
59 domlit_with_iso2022 = /\A(?:[^\\\e\]]+|#{iso2022str})+/n
60 comment_with_iso2022 = /\A(?:[^\\\e()]+|#{iso2022str})+/n
62 quoted_without_iso2022 = /\A[^\\"]+/n
63 domlit_without_iso2022 = /\A[^\\\]]+/n
64 comment_without_iso2022 = /\A[^\\()]+/n
67 PATTERN_TABLE['EUC'] =
69 /\A(?:[#{atomchars}]+|#{iso2022str}|#{eucstr})+/n,
70 /\A(?:[#{tokenchars}]+|#{iso2022str}|#{eucstr})+/n,
75 PATTERN_TABLE['SJIS'] =
77 /\A(?:[#{atomchars}]+|#{iso2022str}|#{sjisstr})+/n,
78 /\A(?:[#{tokenchars}]+|#{iso2022str}|#{sjisstr})+/n,
83 PATTERN_TABLE['UTF8'] =
85 /\A(?:[#{atomchars}]+|#{utf8str})+/n,
86 /\A(?:[#{tokenchars}]+|#{utf8str})+/n,
87 quoted_without_iso2022,
88 domlit_without_iso2022,
89 comment_without_iso2022
91 PATTERN_TABLE['NONE'] =
94 /\A[#{tokenchars}]+/n,
95 quoted_without_iso2022,
96 domlit_without_iso2022,
97 comment_without_iso2022
101 def initialize( str, scantype, comments )
103 @comments = comments || []
107 @received = (scantype == :RECEIVED)
108 @is_mime_header = MIME_HEADERS[scantype]
110 atom, token, @quoted_re, @domlit_re, @comment_re = PATTERN_TABLE[$KCODE]
111 @word_re = (MIME_HEADERS[scantype] ? token : atom)
120 printf "%7d %-10s %s\n",
122 s.respond_to?(:id2name) ? s.id2name : s.inspect,
144 if skip(/\A[\n\r\t ]+/n) # LWSP
148 if s = readstr(@word_re)
156 yield RECV_TOKEN[s.downcase] || :ATOM, s
163 yield :QUOTED, scan_quoted_word()
166 yield :DOMLIT, scan_domain_literal()
169 @comments.push scan_comment()
181 scan_qstr(@quoted_re, /\A"/, 'quoted-word')
184 def scan_domain_literal
185 '[' + scan_qstr(@domlit_re, /\A\]/, 'domain-literal') + ']'
188 def scan_qstr( pattern, terminal, type )
191 if s = readstr(pattern) then result << s
192 elsif skip(terminal) then return result
193 elsif skip(/\A\\/) then result << readchar()
195 raise "TMail FATAL: not match in #{type}"
198 scan_error! "found unterminated #{type}"
204 content = @comment_re
207 if s = readstr(content) then result << s
208 elsif skip(/\A\)/) then nest -= 1
209 return result if nest == 0
211 elsif skip(/\A\(/) then nest += 1
213 elsif skip(/\A\\/) then result << readchar()
215 raise 'TMail FATAL: not match in comment'
218 scan_error! 'found unterminated comment'
223 def init_scanner( str )
236 if m = re.match(@src)
249 if m = re.match(@src)
257 def scan_error!( msg )
258 raise SyntaxError, msg