1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
3 ;; Copyright (C) 1998-2013 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
25 ;; Three: Message Header Extensions for Non-ASCII Text".
31 (defvar message-posting-charset
)
35 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
37 (require 'rfc2045
) ;; rfc2045-encode-string
38 (autoload 'mm-body-7-or-8
"mm-bodies")
40 (defvar rfc2047-header-encoding-alist
41 '(("Newsgroups" . nil
)
44 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\
45 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime
)
47 "*Header/encoding method alist.
48 The list is traversed sequentially. The keys can either be
53 1) nil, in which case no encoding is done;
54 2) `mime', in which case the header will be encoded according to RFC2047;
55 3) `address-mime', like `mime', but takes account of the rules for address
56 fields (where quoted strings and comments must be treated separately);
57 4) a charset, in which case it will be encoded as that charset;
58 5) `default', in which case the field will be encoded as the rest
61 (defvar rfc2047-charset-encoding-alist
87 "Alist of MIME charsets to RFC2047 encodings.
88 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
89 quoted-printable and base64 respectively.")
91 (defvar rfc2047-encode-function-alist
92 '((Q . rfc2047-q-encode-string
)
93 (B . rfc2047-b-encode-string
)
95 "Alist of RFC2047 encodings to encoding functions.")
97 (defvar rfc2047-encode-encoded-words t
98 "Whether encoded words should be encoded again.")
100 (defvar rfc2047-allow-irregular-q-encoded-words t
101 "*Whether to decode irregular Q-encoded words.")
103 (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'.
104 (defconst rfc2047-encoded-word-regexp
105 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
106 \\(B\\?[+/0-9A-Za-z]*=*\
109 "Regexp that matches encoded word."
110 ;; The patterns for the B encoding and the Q encoding, i.e. the ones
111 ;; beginning with "B" and "Q" respectively, are restricted into only
112 ;; the characters that those encodings may generally use.
114 (defconst rfc2047-encoded-word-regexp-loose
115 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
116 \\(B\\?[+/0-9A-Za-z]*=*\
117 \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\
119 "Regexp that matches encoded word allowing loose Q encoding."
120 ;; The pattern for the Q encoding, i.e. the one beginning with "Q",
122 ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*"
123 ;; <--------1-------><----------2,3----------><--4--><-5->
125 ;; 1. After "Q?", allow "?"s that follow a character other than "=".
126 ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator.
127 ;; 3. In the middle of an encoded word, allow "?"s that follow a
128 ;; character other than "=".
129 ;; 4. Allow any characters other than "?" in the middle of an
131 ;; 5. At the end, allow "?"s.
135 ;;; Functions for encoding RFC2047 messages
138 (defun rfc2047-qp-or-base64 ()
139 "Return the type with which to encode the buffer.
140 This is either `base64' or `quoted-printable'."
142 (let ((limit (min (point-max) (+ 2000 (point-min))))
144 (goto-char (point-min))
145 (skip-chars-forward "\x20-\x7f\r\n\t" limit
)
146 (while (< (point) limit
)
149 (skip-chars-forward "\x20-\x7f\r\n\t" limit
))
150 (if (or (< (* 6 n8bit
) (- limit
(point-min)))
151 ;; Don't base64, say, a short line with a single
152 ;; non-ASCII char when splitting parts by charset.
157 (defun rfc2047-narrow-to-field ()
158 "Narrow the buffer to the header on the current line."
164 (if (re-search-forward "^[^ \n\t]" nil t
)
167 (goto-char (point-min)))
169 (defun rfc2047-field-value ()
170 "Return the value of the field at point."
173 (rfc2047-narrow-to-field)
174 (re-search-forward ":[ \t\n]*" nil t
)
175 (buffer-substring-no-properties (point) (point-max)))))
177 (defun rfc2047-quote-special-characters-in-quoted-strings (&optional
179 "Quote special characters with `\\'s in quoted strings.
180 Quoting will not be done in a quoted string if it contains characters
181 matching ENCODABLE-REGEXP or it is within parentheses."
182 (goto-char (point-min))
183 (let ((tspecials (concat "[" ietf-drums-tspecials
"]"))
186 (with-syntax-table (standard-syntax-table)
190 (eq (char-before) ?\
)))
192 (goto-char (point-max)))
194 (narrow-to-region start
(point))
196 (while (search-forward "\"" nil t
)
197 (setq beg
(match-beginning 0))
198 (unless (eq (char-before beg
) ?
\\)
204 (setq end
(1- (point)))
206 (if (and encodable-regexp
207 (re-search-forward encodable-regexp end t
))
210 (narrow-to-region beg end
)
211 (while (re-search-forward tspecials nil
'move
)
212 (if (eq (char-before) ?
\\)
213 (if (looking-at tspecials
) ;; Already quoted.
216 (goto-char (match-beginning 0))
222 (goto-char (point-max)))
224 (setq start
(point))))))
226 (defvar rfc2047-encoding-type
'address-mime
227 "The type of encoding done by `rfc2047-encode-region'.
228 This should be dynamically bound around calls to
229 `rfc2047-encode-region' to either `mime' or `address-mime'. See
230 `rfc2047-header-encoding-alist', for definitions.")
232 (defun rfc2047-encode-message-header ()
233 "Encode the message header according to `rfc2047-header-encoding-alist'.
234 Should be called narrowed to the head of the message."
237 (goto-char (point-min))
238 (let (alist elem method charsets
)
241 (rfc2047-narrow-to-field)
243 alist rfc2047-header-encoding-alist
244 charsets
(mm-find-mime-charset-region (point-min) (point-max)))
245 ;; M$ Outlook boycotts decoding of a header if it consists
246 ;; of two or more encoded words and those charsets differ;
247 ;; it seems to decode all words in a header from a charset
248 ;; found first in the header. So, we unify the charsets into
249 ;; a single one used for encoding the whole text in a header.
250 (let ((mm-coding-system-priorities
251 (if (= (length charsets
) 1)
252 (cons (mm-charset-to-coding-system (car charsets
))
253 mm-coding-system-priorities
)
254 mm-coding-system-priorities
)))
255 (while (setq elem
(pop alist
))
256 (when (or (and (stringp (car elem
))
257 (looking-at (car elem
)))
261 (if (not (rfc2047-encodable-p))
263 (when (eq method
'address-mime
)
264 (rfc2047-quote-special-characters-in-quoted-strings))
265 (if (and (eq (mm-body-7-or-8) '8bit
)
268 (car message-posting-charset
)))
269 ;; 8 bit must be decoded.
270 (mm-encode-coding-region
271 (point-min) (point-max)
272 (mm-charset-to-coding-system
273 (car message-posting-charset
))))
274 ;; No encoding necessary, but folding is nice
278 (goto-char (point-min))
279 (skip-chars-forward "^:")
280 (when (looking-at ": ")
284 ;; We found something that may perhaps be encoded.
285 (re-search-forward "^[^:]+: *" nil t
)
287 ((eq method
'address-mime
)
288 (rfc2047-encode-region (point) (point-max)))
290 (let ((rfc2047-encoding-type 'mime
))
291 (rfc2047-encode-region (point) (point-max))))
292 ((eq method
'default
)
293 (if (and (featurep 'mule
)
294 (if (boundp 'enable-multibyte-characters
)
295 (default-value 'enable-multibyte-characters
))
297 (mm-encode-coding-region (point) (point-max)
298 mail-parse-charset
)))
299 ;; We get this when CC'ing messages to newsgroups with
300 ;; 8-bit names. The group name mail copy just got
301 ;; unconditionally encoded. Previously, it would ask
302 ;; whether to encode, which was quite confusing for the
303 ;; user. If the new behavior is wrong, tell me. I have
304 ;; left the old code commented out below.
305 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
306 ;; Modified by Dave Love, with the commented-out code changed
307 ;; in accordance with changes elsewhere.
309 (rfc2047-encode-region (point) (point-max)))
311 ;;; (if (or (message-options-get
312 ;;; 'rfc2047-encode-message-header-encode-any)
313 ;;; (message-options-set
314 ;;; 'rfc2047-encode-message-header-encode-any
316 ;;; "Some texts are not encoded. Encode anyway?")))
317 ;;; (rfc2047-encode-region (point-min) (point-max))
318 ;;; (error "Cannot send unencoded text")))
319 ((mm-coding-system-p method
)
320 (if (or (and (featurep 'mule
)
321 (if (boundp 'enable-multibyte-characters
)
322 (default-value 'enable-multibyte-characters
)))
323 (featurep 'file-coding
))
324 (mm-encode-coding-region (point) (point-max) method
)))
327 (goto-char (point-max))))))))
329 ;; Fixme: This, and the require below may not be the Right Thing, but
330 ;; should be safe just before release. -- fx 2001-02-08
332 (defun rfc2047-encodable-p ()
333 "Return non-nil if any characters in current buffer need encoding in headers.
334 The buffer may be narrowed."
335 (require 'message
) ; for message-posting-charset
337 (mm-find-mime-charset-region (point-min) (point-max))))
338 (goto-char (point-min))
339 (or (and rfc2047-encode-encoded-words
341 (re-search-forward rfc2047-encoded-word-regexp nil t
)
342 (goto-char (point-min))))
344 (not (equal charsets
(list (car message-posting-charset
))))))))
346 ;; Use this syntax table when parsing into regions that may need
347 ;; encoding. Double quotes are string delimiters, backslash is
348 ;; character quoting, and all other RFC 2822 special characters are
349 ;; treated as punctuation so we can use forward-sexp/forward-word to
350 ;; skip to the end of regions appropriately. Nb. ietf-drums does
351 ;; things differently.
352 (defconst rfc2047-syntax-table
353 ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
354 (let ((table (make-syntax-table)))
355 ;; The following is done to work for setting all elements of the table;
356 ;; it appears to be the cleanest way.
357 ;; Play safe and don't assume the form of the word syntax entry --
359 (if (featurep 'xemacs
)
360 (put-char-table t
(get-char-table ?a
(standard-syntax-table)) table
)
361 (set-char-table-range table t
(aref (standard-syntax-table) ?a
)))
362 (modify-syntax-entry ?
\\ "\\" table
)
363 (modify-syntax-entry ?
\" "\"" table
)
364 (modify-syntax-entry ?\
( "(" table
)
365 (modify-syntax-entry ?\
) ")" table
)
366 (modify-syntax-entry ?\
< "." table
)
367 (modify-syntax-entry ?\
> "." table
)
368 (modify-syntax-entry ?\
[ "." table
)
369 (modify-syntax-entry ?\
] "." table
)
370 (modify-syntax-entry ?
: "." table
)
371 (modify-syntax-entry ?\
; "." table)
372 (modify-syntax-entry ?
, "." table
)
373 (modify-syntax-entry ?
@ "." table
)
376 (defun rfc2047-encode-region (b e
&optional dont-fold
)
377 "Encode words in region B to E that need encoding.
378 By default, the region is treated as containing RFC2822 addresses.
379 Dynamically bind `rfc2047-encoding-type' to change that."
381 (narrow-to-region b e
)
382 (let ((encodable-regexp (if rfc2047-encode-encoded-words
383 "[^\000-\177]+\\|=\\?"
385 start
; start of current token
387 ;; Whether there's an encoded word before the current token,
388 ;; either immediately or separated by space.
390 (orig-text (buffer-substring-no-properties b e
)))
391 (if (eq 'mime rfc2047-encoding-type
)
392 ;; Simple case. Continuous words in which all those contain
393 ;; non-ASCII characters are encoded collectively. Encoding
394 ;; ASCII words, including `Re:' used in Subject headers, is
395 ;; avoided for interoperability with non-MIME clients and
396 ;; for making it easy to find keywords.
398 (goto-char (point-min))
399 (while (progn (skip-chars-forward " \t\n")
402 (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)")
404 (setq end
(match-end 0))
405 (re-search-forward encodable-regexp end t
)))
407 (if (> (point) start
)
408 (rfc2047-encode start
(point))
410 ;; `address-mime' case -- take care of quoted words, comments.
411 (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp
)
412 (with-syntax-table rfc2047-syntax-table
413 (goto-char (point-min))
414 (condition-case err
; in case of unbalanced quotes
415 ;; Look for rfc2822-style: sequences of atoms, quoted
416 ;; strings, specials, whitespace. (Specials mustn't be
420 (skip-chars-forward " \t\n")
423 ((not (char-after))) ; eob
425 ((eq ?
\" (setq csyntax
(char-syntax (char-after))))
429 ;; Does it need encoding?
431 (if (re-search-forward encodable-regexp end
'move
)
432 ;; It needs encoding. Strip the quotes first,
433 ;; since encoded words can't occur in quotes.
440 ;; There was a preceding quoted word. We need
441 ;; to include any separating whitespace in this
442 ;; word to avoid it getting lost.
443 (skip-chars-backward " \t")
444 ;; A space is needed between the encoded words.
448 ;; Adjust the end position for the deleted quotes.
449 (rfc2047-encode start
(- end
2))
450 (setq last-encoded t
)) ; record that it was encoded
451 (setq last-encoded nil
)))
453 ;; Skip other delimiters, but record that they've
454 ;; potentially separated quoted words.
456 (setq last-encoded nil
))
458 (error "Unbalanced parentheses"))
460 ;; Look for the end of parentheses.
462 ;; Encode text as an unstructured field.
463 (let ((rfc2047-encoding-type 'mime
))
464 (rfc2047-encode-region (1+ start
) (1- (point))))
465 (skip-chars-forward ")"))
466 (t ; normal token/whitespace sequence
468 ;; Skip one ASCII word, or encode continuous words
469 ;; in which all those contain non-ASCII characters.
471 (while (not (or end
(eobp)))
472 (when (looking-at "[\000-\177]+")
476 (while (and (or (re-search-forward
477 "[ \t\n]\\|\\Sw" end
'move
)
479 (eq ?
\\ (char-syntax (char-before))))
480 ;; Skip backslash-quoted characters.
483 (setq end
(match-beginning 0))
484 (if rfc2047-encode-encoded-words
487 (when (search-forward "=?" end
'move
)
488 (goto-char (match-beginning 0))
491 ;; Where the value nil of `end' means there may be
492 ;; text to have to be encoded following the point.
493 ;; Otherwise, the point reached to the end of ASCII
494 ;; words separated by whitespace or a special char.
496 (when (looking-at encodable-regexp
)
497 (goto-char (setq begin
(match-end 0)))
498 (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)")
499 (setq end
(match-end 0))
501 (while (re-search-forward
502 encodable-regexp end t
))
505 (or (not (re-search-forward "\\Sw" end t
))
507 (goto-char (match-beginning 0))
510 (when (looking-at "[^ \t\n]+")
511 (setq end
(match-end 0))
512 (if (re-search-forward "\\Sw+" end t
)
513 ;; There are special characters better
514 ;; to be encoded so that MTAs may parse
516 (cond ((= end
(point)))
517 ((looking-at (concat "\\sw*\\("
522 (goto-char (1- (match-end 0)))
523 (unless (= (point) (match-beginning 0))
524 ;; Separate encodable text and
528 (skip-chars-forward " \t\n")
529 (if (and (looking-at "[^ \t\n]+")
530 (string-match encodable-regexp
533 (goto-char end
)))))))
534 (skip-chars-backward " \t\n")
537 (if (re-search-forward encodable-regexp end
'move
)
539 (unless (memq (char-before start
) '(nil ?
\t ?
))
542 (skip-chars-backward "^ \t\n")
543 (and (looking-at "\\Sw+")
544 (= (match-end 0) start
)))
545 ;; Also encode bogus delimiters.
547 ;; Separate encodable text and delimiter.
550 (setq start
(1+ start
)
552 (rfc2047-encode start end
)
553 (setq last-encoded t
))
554 (setq last-encoded nil
)))))
556 (if (or debug-on-quit debug-on-error
)
557 (signal (car err
) (cdr err
))
558 (error "Invalid data for rfc2047 encoding: %s"
559 (mm-replace-in-string orig-text
"[ \t\n]+" " "))))))))
561 (rfc2047-fold-region b
(point)))
562 (goto-char (point-max))))
564 (defun rfc2047-encode-string (string &optional dont-fold
)
565 "Encode words in STRING.
566 By default, the string is treated as containing addresses (see
567 `rfc2047-encoding-type')."
568 (mm-with-multibyte-buffer
570 (rfc2047-encode-region (point-min) (point-max) dont-fold
)
574 ;; 2. Syntax of encoded-words
576 ;; While there is no limit to the length of a multiple-line header
577 ;; field, each line of a header field that contains one or more
578 ;; 'encoded-word's is limited to 76 characters.
580 ;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it.
581 (defvar rfc2047-encode-max-chars
76
582 "Maximum characters of each header line that contain encoded-words.
583 According to RFC 2047, it is 76. If it is nil, encoded-words
584 will not be folded. Too small value may cause an error. You
585 should not change this value.")
587 (defun rfc2047-encode-1 (column string cs encoder start crest tail
589 "Subroutine used by `rfc2047-encode'."
590 (cond ((string-equal string
"")
592 ((not rfc2047-encode-max-chars
)
594 (funcall encoder
(if cs
595 (mm-encode-coding-string string cs
)
598 ((>= column rfc2047-encode-max-chars
)
600 (cond ((string-match "\n[ \t]+\\'" eword
)
601 ;; Remove a superfluous empty line.
602 (setq eword
(substring eword
0 (match-beginning 0))))
603 ((string-match "(+\\'" eword
)
604 ;; Break the line before the open parenthesis.
605 (setq crest
(concat crest
(match-string 0 eword
))
606 eword
(substring eword
0 (match-beginning 0))))))
607 (rfc2047-encode-1 (length crest
) string cs encoder start
" " tail
608 (concat eword
"\n" crest
)))
611 (limit (1- (length string
)))
616 (setq next
(concat start
619 (mm-encode-coding-string
620 (substring string
0 (1+ index
))
622 (substring string
0 (1+ index
))))
624 len
(+ column
(length next
)))
625 (if (> len rfc2047-encode-max-chars
)
628 (if (or (< index limit
)
629 (<= (+ len
(or (string-match "\n" tail
)
631 rfc2047-encode-max-chars
))
634 (if (string-match "\\`)+" tail
)
635 ;; Break the line after the close parenthesis.
636 (setq tail
(concat (substring tail
0 (match-end 0))
638 (substring tail
(match-end 0)))
644 (concat eword next tail
)
647 (string-match "(+\\'" eword
))
648 (setq crest
(concat crest
(match-string 0 eword
))
649 eword
(substring eword
0 (match-beginning 0)))
650 (setq eword
(concat eword next
)))
652 eword
(concat eword next
)))
653 (when (string-match "\n[ \t]+\\'" eword
)
654 ;; Remove a superfluous empty line.
655 (setq eword
(substring eword
0 (match-beginning 0))))
656 (rfc2047-encode-1 (length crest
) (substring string index
)
657 cs encoder start
" " tail
658 (concat eword
"\n" crest
)))))))
660 (defun rfc2047-encode (b e
)
661 "Encode the word(s) in the region B to E.
662 Point moves to the end of the region."
663 (let ((mime-charset (or (mm-find-mime-charset-region b e
) (list 'us-ascii
)))
664 cs encoding tail crest eword
)
665 ;; Use utf-8 as a last resort if determining charset of text fails.
666 (if (memq nil mime-charset
)
667 (setq mime-charset
(list 'utf-8
)))
668 (cond ((> (length mime-charset
) 1)
669 (error "Can't rfc2047-encode `%s'"
670 (buffer-substring-no-properties b e
)))
671 ((= (length mime-charset
) 1)
672 (setq mime-charset
(car mime-charset
)
673 cs
(mm-charset-to-coding-system mime-charset
))
674 (unless (and (mm-multibyte-p)
675 (mm-coding-system-p cs
))
678 (narrow-to-region b e
)
680 (or (cdr (assq mime-charset
681 rfc2047-charset-encoding-alist
))
682 ;; For the charsets that don't have a preferred
683 ;; encoding, choose the one that's shorter.
684 (if (eq (rfc2047-qp-or-base64) 'base64
)
689 (skip-chars-forward "^ \t\n")
690 ;; `tail' may contain a close parenthesis.
691 (setq tail
(buffer-substring-no-properties e
(point)))
693 (setq b
(point-marker)
694 e
(set-marker (make-marker) e
))
695 (rfc2047-fold-region (point-at-bol) b
)
697 (skip-chars-backward "^ \t\n")
698 (unless (= 0 (skip-chars-backward " \t"))
699 ;; `crest' may contain whitespace and an open parenthesis.
700 (setq crest
(buffer-substring-no-properties (point) b
)))
701 (setq eword
(rfc2047-encode-1
703 (mm-replace-in-string
704 (buffer-substring-no-properties b e
)
705 "\n\\([ \t]?\\)" "\\1")
707 (or (cdr (assq encoding
708 rfc2047-encode-function-alist
))
710 (concat "=?" (downcase (symbol-name mime-charset
))
711 "?" (upcase (symbol-name encoding
)) "?")
714 (delete-region (if (eq (aref eword
0) ?
\n)
716 ;; The line was folded before encoding.
721 ;; `eword' contains `crest' and `tail'.
725 (unless (or (/= 0 (length tail
))
727 (looking-at "[ \t\n)]"))
732 (defun rfc2047-fold-field ()
733 "Fold the current header field."
736 (rfc2047-narrow-to-field)
737 (rfc2047-fold-region (point-min) (point-max)))))
739 (defun rfc2047-fold-region (b e
)
740 "Fold long lines in region B to E."
742 (narrow-to-region b e
)
743 (goto-char (point-min))
747 (bol (save-restriction
751 (when (and (or break qword-break
)
752 (> (- (point) bol
) 76))
753 (goto-char (or break qword-break
))
756 (skip-chars-backward " \t")
757 (if (looking-at "[ \t]")
760 (setq bol
(1- (point)))
761 ;; Don't break before the first non-LWSP characters.
762 (skip-chars-forward " \t")
766 ((eq (char-after) ?
\n)
771 (skip-chars-forward " \t")
772 (unless (or (eobp) (eq (char-after) ?
\n))
774 ((eq (char-after) ?
\r)
776 ((memq (char-after) '(? ?
\t))
777 (skip-chars-forward " \t")
778 (unless first
;; Don't break just after the header name.
779 (setq break
(point))))
781 (if (not (looking-at "=\\?[^=]"))
782 (if (eq (char-after) ?
=)
784 (skip-chars-forward "^ \t\n\r="))
785 ;; Don't break at the start of the field.
786 (unless (= (point) b
)
787 (setq qword-break
(point)))
788 (skip-chars-forward "^ \t\n\r")))
790 (skip-chars-forward "^ \t\n\r")))
792 (when (and (or break qword-break
)
793 (> (- (point) bol
) 76))
794 (goto-char (or break qword-break
))
797 (if (or (> 0 (skip-chars-backward " \t"))
798 (looking-at "[ \t]"))
801 (setq bol
(1- (point)))
802 ;; Don't break before the first non-LWSP characters.
803 (skip-chars-forward " \t")
805 (forward-char 1))))))
807 (defun rfc2047-unfold-field ()
808 "Fold the current line."
811 (rfc2047-narrow-to-field)
812 (rfc2047-unfold-region (point-min) (point-max)))))
814 (defun rfc2047-unfold-region (b e
)
815 "Unfold lines in region B to E."
817 (narrow-to-region b e
)
818 (goto-char (point-min))
819 (let ((bol (save-restriction
822 (eol (point-at-eol)))
825 (if (and (looking-at "[ \t]")
826 (< (- (point-at-eol) bol
) 76))
827 (delete-region eol
(progn
829 (skip-chars-forward "\r\n")
831 (setq bol
(point-at-bol)))
832 (setq eol
(point-at-eol))
835 (defun rfc2047-b-encode-string (string)
836 "Base64-encode the header contained in STRING."
837 (base64-encode-string string t
))
839 (autoload 'quoted-printable-encode-region
"qp")
841 (defun rfc2047-q-encode-string (string)
842 "Quoted-printable-encode the header in STRING."
843 (mm-with-unibyte-buffer
845 (quoted-printable-encode-region
846 (point-min) (point-max) nil
847 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
848 ;; Avoid using 8bit characters.
849 ;; This list excludes `especials' (see the RFC2047 syntax),
850 ;; meaning that some characters in non-structured fields will
851 ;; get encoded when they con't need to be. The following is
852 ;; what it used to be.
853 ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
854 ;;; "\010\012\014\040-\074\076\100-\136\140-\177")
855 "-\b\n\f !#-'*+0-9A-Z\\^`-~\d")
856 (subst-char-in-region (point-min) (point-max) ? ?_
)
859 (defun rfc2047-encode-parameter (param value
)
860 "Return and PARAM=VALUE string encoded in the RFC2047-like style.
861 This is a substitution for the `rfc2231-encode-string' function, that
862 is the standard but many mailers don't support it."
863 (let ((rfc2047-encoding-type 'mime
)
864 (rfc2047-encode-max-chars nil
))
865 (rfc2045-encode-string param
(rfc2047-encode-string value t
))))
868 ;;; Functions for decoding RFC2047 messages
871 (defvar rfc2047-quote-decoded-words-containing-tspecials nil
872 "If non-nil, quote decoded words containing special characters.")
874 (defvar rfc2047-allow-incomplete-encoded-text t
875 "*Non-nil means allow incomplete encoded-text in successive encoded-words.
876 Dividing of encoded-text in the place other than character boundaries
877 violates RFC2047 section 5, while we have a capability to decode it.
878 If it is non-nil, the decoder will decode B- or Q-encoding in each
879 encoded-word, concatenate them, and decode it by charset. Otherwise,
880 the decoder will fully decode each encoded-word before concatenating
883 (defun rfc2047-strip-backslashes-in-quoted-strings ()
884 "Strip backslashes in quoted strings. `\\\"' remains."
885 (goto-char (point-min))
887 (with-syntax-table (standard-syntax-table)
888 (while (search-forward "\"" nil t
)
889 (unless (eq (char-before) ?
\\)
890 (setq beg
(match-end 0))
891 (goto-char (match-beginning 0))
896 (narrow-to-region beg
(1- (point)))
898 (while (search-forward "\\" nil
'move
)
899 (unless (memq (char-after) '(?
\"))
904 (goto-char beg
))))))))
906 (defun rfc2047-charset-to-coding-system (charset &optional allow-override
)
907 "Return coding-system corresponding to MIME CHARSET.
908 If your Emacs implementation can't decode CHARSET, return nil.
910 If allow-override is given, use `mm-charset-override-alist' to
911 map undesired charset names to their replacement. This should
912 only be used for decoding, not for encoding."
913 (when (stringp charset
)
914 (setq charset
(intern (downcase charset
))))
915 (when (or (not charset
)
916 (eq 'gnus-all mail-parse-ignored-charsets
)
917 (memq 'gnus-all mail-parse-ignored-charsets
)
918 (memq charset mail-parse-ignored-charsets
))
919 (setq charset mail-parse-charset
))
920 (let ((cs (mm-charset-to-coding-system charset nil allow-override
)))
921 (cond ((eq cs
'ascii
)
922 (setq cs
(or (mm-charset-to-coding-system mail-parse-charset
)
924 ((mm-coding-system-p cs
))
926 (listp mail-parse-ignored-charsets
)
927 (memq 'gnus-unknown mail-parse-ignored-charsets
))
928 (setq cs
(mm-charset-to-coding-system mail-parse-charset
))))
933 (autoload 'quoted-printable-decode-string
"qp")
935 (defun rfc2047-decode-encoded-words (words)
936 "Decode successive encoded-words in WORDS and return a decoded string.
937 Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT
939 (let (word charset cs encoding text rest
)
941 (setq word
(pop words
))
942 (if (and (setq cs
(rfc2047-charset-to-coding-system
943 (setq charset
(car word
)) t
))
945 (cond ((char-equal ?B
(nth 1 word
))
946 (setq text
(base64-decode-string
947 (rfc2047-pad-base64 (nth 2 word
)))))
948 ((char-equal ?Q
(nth 1 word
))
949 (setq text
(quoted-printable-decode-string
950 (mm-subst-char-in-string
951 ?_ ?
(nth 2 word
) t
)))))
953 (message "%s" (error-message-string code
))
955 (if (and rfc2047-allow-incomplete-encoded-text
957 ;; Concatenate text of which the charset is the same.
958 (setcdr (car rest
) (concat (cdar rest
) text
))
959 (push (cons cs text
) rest
))
960 ;; Don't decode encoded-word.
961 (push (cons nil
(nth 3 word
)) rest
)))
964 (or (and (setq cs
(caar rest
))
966 (mm-decode-coding-string (cdar rest
) cs
)
968 (message "%s" (error-message-string code
))
970 (concat (when (cdr rest
) " ")
973 (not (eq (string-to-char words
) ?
)))
979 ;; Fixme: This should decode in place, not cons intermediate strings.
980 ;; Also check whether it needs to worry about delimiting fields like
983 ;; In fact it's reported that (invalid) encoding of mailboxes in
984 ;; addr-specs is in use, so delimiting fields might help. Probably
985 ;; not decoding a word which isn't properly delimited is good enough
986 ;; and worthwhile (is it more correct or not?), e.g. something like
987 ;; `=?iso-8859-1?q?foo?=@'.
989 (defun rfc2047-decode-region (start end
&optional address-mime
)
990 "Decode MIME-encoded words in region between START and END.
991 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
992 other than `\"' and `\\' in quoted strings."
994 (let ((case-fold-search t
)
996 (if rfc2047-allow-irregular-q-encoded-words
998 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp-loose
"\\)"))
1000 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp
"\\)"))))
1004 (narrow-to-region start end
)
1006 (rfc2047-strip-backslashes-in-quoted-strings))
1007 (goto-char (setq b start
))
1008 ;; Look for the encoded-words.
1009 (while (setq match
(re-search-forward eword-regexp nil t
))
1010 (setq e
(match-beginning 1)
1014 (push (list (match-string 2) ;; charset
1015 (char-after (match-beginning 3)) ;; encoding
1016 (substring (match-string 3) 2) ;; encoded-text
1017 (match-string 1)) ;; encoded-word
1019 ;; Look for the subsequent encoded-words.
1020 (when (setq match
(looking-at eword-regexp
))
1021 (goto-char (setq end
(match-end 0)))))
1022 ;; Replace the encoded-words with the decoded one.
1023 (delete-region e end
)
1024 (insert (rfc2047-decode-encoded-words (nreverse words
)))
1026 (narrow-to-region e
(point))
1028 ;; Remove newlines between decoded words, though such
1029 ;; things essentially must not be there.
1030 (while (re-search-forward "[\n\r]+" nil t
)
1031 (replace-match " "))
1032 (setq end
(point-max))
1033 ;; Quote decoded words if there are special characters
1034 ;; which might violate RFC2822.
1035 (when (and rfc2047-quote-decoded-words-containing-tspecials
1036 (let ((regexp (car (rassq
1038 rfc2047-header-encoding-alist
))))
1043 ;; Don't quote words if already quoted.
1044 (not (and (eq (char-before e
) ?
\")
1045 (eq (char-after end
) ?
\")))
1048 (while (and (memq (char-after) '(? ?
\t))
1049 (zerop (forward-line -
1))))
1050 (looking-at regexp
)))))))
1053 (skip-chars-forward " \t")
1054 (setq start
(point))
1055 (setq quoted
(eq (char-after) ?
\"))
1056 (goto-char (point-max))
1057 (skip-chars-backward " \t" start
)
1058 (if (setq quoted
(and quoted
1059 (> (point) (1+ start
))
1060 (eq (char-before) ?
\")))
1063 (setq start
(1+ start
)
1064 end
(point-marker)))
1065 (setq end
(point-marker)))
1067 (while (search-forward "\"" end t
)
1070 (zerop (%
(skip-chars-backward "\\\\") 2))
1071 (goto-char (match-beginning 0)))
1074 (when (and (not quoted
)
1078 (concat "[" ietf-drums-tspecials
"]")
1084 (set-marker end nil
)))
1085 (goto-char (point-max)))
1086 (when (and (mm-multibyte-p)
1088 (not (eq mail-parse-charset
'us-ascii
))
1089 (not (eq mail-parse-charset
'gnus-decoded
)))
1090 (mm-decode-coding-region b e mail-parse-charset
))
1092 (when (and (mm-multibyte-p)
1094 (not (eq mail-parse-charset
'us-ascii
))
1095 (not (eq mail-parse-charset
'gnus-decoded
)))
1096 (mm-decode-coding-region b
(point-max) mail-parse-charset
))))))
1098 (defun rfc2047-decode-address-region (start end
)
1099 "Decode MIME-encoded words in region between START and END.
1100 Backslashes which precede characters other than `\"' and `\\' in quoted
1101 strings are stripped."
1102 (rfc2047-decode-region start end t
))
1104 (defun rfc2047-decode-string (string &optional address-mime
)
1105 "Decode MIME-encoded STRING and return the result.
1106 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
1107 other than `\"' and `\\' in quoted strings."
1108 ;; (let ((m (mm-multibyte-p)))
1109 (if (string-match "=\\?" string
)
1111 ;; We used to only call mm-enable-multibyte if `m' is non-nil,
1112 ;; but this can't be the right criterion. Don't just revert this
1113 ;; change if it encounters a bug. Please help me fix it
1114 ;; right instead. --Stef
1115 ;; The string returned should always be multibyte in a multibyte
1116 ;; session, i.e. the buffer should be multibyte before
1117 ;; `buffer-string' is called.
1118 (mm-enable-multibyte)
1121 (rfc2047-decode-region (point-min) (point-max) address-mime
))
1126 (when (mm-multibyte-string-p string
)
1127 (mm-enable-multibyte))
1129 (rfc2047-strip-backslashes-in-quoted-strings)
1131 ;; Fixme: As above, `m' here is inappropriate.
1134 (not (eq mail-parse-charset
'us-ascii
))
1135 (not (eq mail-parse-charset
'gnus-decoded
)))
1136 ;; `decode-coding-string' in Emacs offers a third optional
1137 ;; arg NOCOPY to avoid consing a new string if the decoding
1138 ;; is "trivial". Unfortunately it currently doesn't
1139 ;; consider anything else than a `nil' coding system
1141 ;; `rfc2047-decode-string' is called multiple times for each
1142 ;; article during summary buffer generation, and we really
1143 ;; want to avoid unnecessary consing. So we bypass
1144 ;; `decode-coding-string' if the string is purely ASCII.
1145 (if (and (fboundp 'detect-coding-string
)
1146 ;; string is purely ASCII
1147 (eq (detect-coding-string string t
) 'undecided
))
1149 (mm-decode-coding-string string mail-parse-charset
))
1150 (mm-string-to-multibyte string
)))) ;; )
1152 (defun rfc2047-decode-address-string (string)
1153 "Decode MIME-encoded STRING and return the result.
1154 Backslashes which precede characters other than `\"' and `\\' in quoted
1155 strings are stripped."
1156 (rfc2047-decode-string string t
))
1158 (defun rfc2047-pad-base64 (string)
1159 "Pad STRING to quartets."
1160 ;; Be more liberal to accept buggy base64 strings. If
1161 ;; base64-decode-string accepts buggy strings, this function could
1162 ;; be aliased to identity.
1163 (if (= 0 (mod (length string
) 4))
1165 (when (string-match "=+$" string
)
1166 (setq string
(substring string
0 (match-beginning 0))))
1167 (case (mod (length string
) 4)
1169 (1 string
) ;; Error, don't pad it.
1170 (2 (concat string
"=="))
1171 (3 (concat string
"=")))))
1175 ;;; rfc2047.el ends here