1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
3 ;; Copyright (C) 1986, 87, 1990 Free Software Foundation, Inc.
5 ;; Author: Richard Mlynarik <mly@eddie.mit.edu>
9 ;; This file is part of GNU Emacs.
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
27 ;; uses address-start free, throws to address
28 (defun rfc822-bad-address (reason)
31 (narrow-to-region address-start
32 (if (re-search-forward "[,;]" nil t
)
33 (max (point-min) (1- (point)))
35 ;; make the error string be suitable for inclusion in (...)
36 (let ((losers '("\\" "(" ")" "\n")))
38 (goto-char (point-min))
39 (while (search-forward (car losers
) nil t
)
43 (setq losers
(cdr losers
))))
44 (goto-char (point-min)) (insert "(Unparsable address -- "
47 (goto-char (point-max)) (insert "\")"))
48 (rfc822-nuke-whitespace)
49 (throw 'address
(buffer-substring address-start
(point))))
51 (defun rfc822-nuke-whitespace (&optional leave-space
)
55 ((= (setq ch
(following-char)) ?\
()
58 (rfc822-bad-address "Unbalanced comment (...)")
59 (/= (setq ch
(following-char)) ?\
)))
60 (cond ((looking-at "[^()\\]+")
63 (rfc822-nuke-whitespace))
64 ((< (point) (1- (point-max)))
67 (rfc822-bad-address "orphaned backslash"))))
68 ;; delete remaining "()"
72 ((memq ch
'(?\ ?
\t ?
\n))
73 (delete-region (point)
74 (progn (skip-chars-forward " \t\n") (point)))
81 (= (preceding-char) ?\
)
84 (defun rfc822-looking-at (regex &optional leave-space
)
85 (if (cond ((stringp regex
)
86 (if (looking-at regex
)
87 (progn (goto-char (match-end 0))
91 (= (following-char) regex
))
92 (progn (forward-char 1)
94 (let ((tem (match-data)))
95 (rfc822-nuke-whitespace leave-space
)
96 (store-match-data tem
)
99 (defun rfc822-snarf-word ()
100 ;; word is atom | quoted-string
101 (cond ((= (following-char) ?
\")
103 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
104 (rfc822-bad-address "Unterminated quoted string")))
105 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
109 (rfc822-bad-address "Rubbish in address"))))
111 (defun rfc822-snarf-words ()
113 (while (rfc822-looking-at ?.
)
114 (rfc822-snarf-word)))
116 (defun rfc822-snarf-subdomain ()
117 ;; sub-domain is domain-ref | domain-literal
118 (cond ((= (following-char) ?\
[)
120 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
121 (rfc822-bad-address "Unterminated domain literal [...]")))
122 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
123 ;; domain-literal = atom
126 (rfc822-bad-address "Rubbish in host/domain specification"))))
128 (defun rfc822-snarf-domain ()
129 (rfc822-snarf-subdomain)
130 (while (rfc822-looking-at ?.
)
131 (rfc822-snarf-subdomain)))
133 (defun rfc822-snarf-frob-list (name separator terminator snarfer
140 (format "End of addresses in middle of %s" name
)))
141 ((rfc822-looking-at terminator
)
143 ((rfc822-looking-at separator
)
144 ;; multiple separators are allowed and do nothing.
145 (while (rfc822-looking-at separator
))
151 (format "Gubbish in middle of %s" name
))))
152 (setq tem
(funcall snarfer
)
155 (setq list
(if (listp tem
)
156 (nconc (reverse tem
) list
)
160 ;; return either an address (a string) or a list of addresses
161 (defun rfc822-addresses-1 (&optional allow-groups
)
162 ;; Looking for an rfc822 `address'
163 ;; Either a group (1*word ":" [#mailbox] ";")
164 ;; or a mailbox (addr-spec | 1*word route-addr)
165 ;; addr-spec is (local-part "@" domain)
166 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
167 ;; local-part is (word *("." word))
168 ;; word is (atom | quoted-string)
169 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
170 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
171 ;; domain is sub-domain *("." sub-domain)
172 ;; sub-domain is domain-ref | domain-literal
173 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
174 ;; dtext is "[^][\\n"
175 ;; domain-ref is atom
176 (let ((address-start (point))
179 ;; optimize common cases:
182 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
183 ;; other common cases are:
184 ;; foo bar <foo.bar@baz.zap>
185 ;; "foo bar" <foo.bar@baz.zap>
186 ;; those aren't hacked yet.
187 (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t
)
189 (rfc822-looking-at ?
,))))
191 ;; rfc822-looking-at may have inserted a space
192 (or (bobp) (/= (preceding-char) ?\
) (delete-char -
1))
193 ;; relying on the fact that rfc822-looking-at <char>
194 ;; doesn't mung match-data
195 (throw 'address
(buffer-substring address-start
(match-end 0)))))
196 (goto-char address-start
)
198 (cond ((and (= n
1) (rfc822-looking-at ?
@))
200 (rfc822-snarf-domain)
202 (buffer-substring address-start
(point))))
203 ((rfc822-looking-at ?
:)
204 (cond ((not allow-groups
)
205 (rfc822-bad-address "A group name may not appear here"))
207 (rfc822-bad-address "No name for :...; group")))
210 ;; return a list of addresses
211 (rfc822-snarf-frob-list ":...; group" ?\
, ?\
;
212 'rfc822-addresses-1 t
)))
213 ((rfc822-looking-at ?
<)
214 (let ((start (point))
216 (cond ((rfc822-looking-at ?
>)
219 ((and (not (eobp)) (= (following-char) ?\
@))
220 ;; <@foo.bar,@baz:quux@abcd.efg>
221 (rfc822-snarf-frob-list "<...> address" ?\
, ?\
:
223 (if (rfc822-looking-at ?\
@)
224 (rfc822-snarf-domain)
226 "Gubbish in route-addr")))))
228 (or (rfc822-looking-at ?
@)
229 (rfc822-bad-address "Malformed <..@..> address"))
230 (rfc822-snarf-domain)
232 ((progn (rfc822-snarf-words) (rfc822-looking-at ?
@))
233 ; allow <foo> (losing unix seems to do this)
234 (rfc822-snarf-domain)))
236 (if (rfc822-looking-at ?\
>)
238 (buffer-substring (if strip start
(1- start
))
239 (if strip end
(1+ end
))))
240 (rfc822-bad-address "Unterminated <...> address")))))
241 ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
242 ;; this allows "." to be part of the words preceding
243 ;; an addr-spec, since many broken mailers output
244 ;; "Hern K. Herklemeyer III
245 ;; <yank@megadeath.dod.gods-own-country>"
248 (or (= n
0) (bobp) (= (preceding-char) ?\
)
252 (setq again
(or (rfc822-looking-at ?.
)
253 (looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]"))))))
255 (throw 'address nil
))
256 ((= n
1) ; allow "foo" (losing unix seems to do this)
258 (buffer-substring address-start
(point))))
260 (rfc822-bad-address "Missing comma between addresses or badly-formatted address"))
261 ((or (eobp) (= (following-char) ?
,))
262 (rfc822-bad-address "Missing comma or route-spec"))
264 (rfc822-bad-address "Strange character or missing comma")))))))
267 (defun rfc822-addresses (header-text)
268 (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
270 ;; Make very simple case moderately fast.
271 (list (substring header-text
(match-beginning 1) (match-end 1)))
272 (let ((buf (generate-new-buffer " rfc822")))
276 (make-local-variable 'case-fold-search
)
277 (setq case-fold-search nil
) ;For speed(?)
279 ;; unfold continuation lines
280 (goto-char (point-min))
282 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t
)
283 (replace-match "\\1 " t
))
285 (goto-char (point-min))
286 (rfc822-nuke-whitespace)
289 address-start
); this is for rfc822-bad-address
291 (setq address-start
(point))
293 (catch 'address
; this is for rfc822-bad-address
294 (cond ((rfc822-looking-at ?\
,)
296 ((looking-at "[][\000-\037\177-\377@;:\\.>)]")
299 (format "Strange character \\%c found"
302 (rfc822-addresses-1 t
)))))
305 (setq list
(cons tem list
)))
307 (setq list
(nconc (nreverse tem
) list
)))))
309 (and buf
(kill-buffer buf
))))))
313 ;;; rfc822.el ends here