1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
3 ;; Copyright (C) 1998-2016 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; This file is part of GNU Emacs.
9 ;; GNU Emacs is free software: you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation, either version 3 of the License, or
12 ;; (at your option) any later version.
14 ;; GNU Emacs is distributed in the hope that it will be useful,
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;; GNU General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
25 ;; Three: Message Header Extensions for Non-ASCII Text".
31 (defvar message-posting-charset
)
35 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
37 (require 'rfc2045
) ;; rfc2045-encode-string
38 (autoload 'mm-body-7-or-8
"mm-bodies")
45 (defcustom rfc2047-header-encoding-alist
46 '(("Newsgroups" . nil
)
49 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\
50 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime
)
52 "Header/encoding method alist.
53 The list is traversed sequentially. The keys can either be
58 1) nil, in which case no encoding is done;
59 2) `mime', in which case the header will be encoded according to RFC2047;
60 3) `address-mime', like `mime', but takes account of the rules for address
61 fields (where quoted strings and comments must be treated separately);
62 4) a charset, in which case it will be encoded as that charset;
63 5) `default', in which case the field will be encoded as the rest
65 :type
'(alist :key-type
(choice regexp
(const t
))
66 :value-type
(choice (const nil
) (const mime
)
71 (defvar rfc2047-charset-encoding-alist
97 "Alist of MIME charsets to RFC2047 encodings.
98 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
99 quoted-printable and base64 respectively.")
101 (defvar rfc2047-encode-function-alist
102 '((Q . rfc2047-q-encode-string
)
103 (B . rfc2047-b-encode-string
)
105 "Alist of RFC2047 encodings to encoding functions.")
107 (defvar rfc2047-encode-encoded-words t
108 "Whether encoded words should be encoded again.")
110 (defcustom rfc2047-allow-irregular-q-encoded-words t
111 "Whether to decode irregular Q-encoded words."
114 (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'.
115 (defconst rfc2047-encoded-word-regexp
116 "=\\?\\([^][\000-\040()<>@,;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
117 \\(B\\?[+/0-9A-Za-z]*=*\
120 "Regexp that matches encoded word."
121 ;; The patterns for the B encoding and the Q encoding, i.e. the ones
122 ;; beginning with "B" and "Q" respectively, are restricted into only
123 ;; the characters that those encodings may generally use.
125 (defconst rfc2047-encoded-word-regexp-loose
126 "=\\?\\([^][\000-\040()<>@,;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
127 \\(B\\?[+/0-9A-Za-z]*=*\
128 \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\
130 "Regexp that matches encoded word allowing loose Q encoding."
131 ;; The pattern for the Q encoding, i.e. the one beginning with "Q",
133 ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*"
134 ;; <--------1-------><----------2,3----------><--4--><-5->
136 ;; 1. After "Q?", allow "?"s that follow a character other than "=".
137 ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator.
138 ;; 3. In the middle of an encoded word, allow "?"s that follow a
139 ;; character other than "=".
140 ;; 4. Allow any characters other than "?" in the middle of an
142 ;; 5. At the end, allow "?"s.
146 ;;; Functions for encoding RFC2047 messages
149 (defun rfc2047-qp-or-base64 ()
150 "Return the type with which to encode the buffer.
151 This is either `base64' or `quoted-printable'."
153 (let ((limit (min (point-max) (+ 2000 (point-min))))
155 (goto-char (point-min))
156 (skip-chars-forward "\x20-\x7f\r\n\t" limit
)
157 (while (< (point) limit
)
160 (skip-chars-forward "\x20-\x7f\r\n\t" limit
))
161 (if (or (< (* 6 n8bit
) (- limit
(point-min)))
162 ;; Don't base64, say, a short line with a single
163 ;; non-ASCII char when splitting parts by charset.
168 (defun rfc2047-narrow-to-field ()
169 "Narrow the buffer to the header on the current line."
175 (if (re-search-forward "^[^ \n\t]" nil t
)
178 (goto-char (point-min)))
180 (defun rfc2047-field-value ()
181 "Return the value of the field at point."
184 (rfc2047-narrow-to-field)
185 (re-search-forward ":[ \t\n]*" nil t
)
186 (buffer-substring-no-properties (point) (point-max)))))
188 (defun rfc2047-quote-special-characters-in-quoted-strings (&optional
190 "Quote special characters with `\\'s in quoted strings.
191 Quoting will not be done in a quoted string if it contains characters
192 matching ENCODABLE-REGEXP or it is within parentheses."
193 (goto-char (point-min))
194 (let ((tspecials (concat "[" ietf-drums-tspecials
"]"))
197 (with-syntax-table (standard-syntax-table)
201 (eq (char-before) ?\
)))
203 (goto-char (point-max)))
205 (narrow-to-region start
(point))
207 (while (search-forward "\"" nil t
)
208 (setq beg
(match-beginning 0))
209 (unless (eq (char-before beg
) ?
\\)
215 (setq end
(1- (point)))
217 (if (and encodable-regexp
218 (re-search-forward encodable-regexp end t
))
221 (narrow-to-region beg end
)
222 (while (re-search-forward tspecials nil
'move
)
223 (if (eq (char-before) ?
\\)
224 (if (looking-at tspecials
) ;; Already quoted.
227 (goto-char (match-beginning 0))
233 (goto-char (point-max)))
235 (setq start
(point))))))
237 (defvar rfc2047-encoding-type
'address-mime
238 "The type of encoding done by `rfc2047-encode-region'.
239 This should be dynamically bound around calls to
240 `rfc2047-encode-region' to either `mime' or `address-mime'. See
241 `rfc2047-header-encoding-alist', for definitions.")
243 (defun rfc2047-encode-message-header ()
244 "Encode the message header according to `rfc2047-header-encoding-alist'.
245 Should be called narrowed to the head of the message."
248 (goto-char (point-min))
249 (let (alist elem method charsets
)
252 (rfc2047-narrow-to-field)
254 alist rfc2047-header-encoding-alist
255 charsets
(mm-find-mime-charset-region (point-min) (point-max)))
256 ;; M$ Outlook boycotts decoding of a header if it consists
257 ;; of two or more encoded words and those charsets differ;
258 ;; it seems to decode all words in a header from a charset
259 ;; found first in the header. So, we unify the charsets into
260 ;; a single one used for encoding the whole text in a header.
261 (let ((mm-coding-system-priorities
262 (if (= (length charsets
) 1)
263 (cons (mm-charset-to-coding-system (car charsets
))
264 mm-coding-system-priorities
)
265 mm-coding-system-priorities
)))
266 (while (setq elem
(pop alist
))
267 (when (or (and (stringp (car elem
))
268 (looking-at (car elem
)))
272 (if (not (rfc2047-encodable-p))
274 (when (eq method
'address-mime
)
275 (rfc2047-quote-special-characters-in-quoted-strings))
276 (if (and (eq (mm-body-7-or-8) '8bit
)
279 (car message-posting-charset
)))
280 ;; 8 bit must be decoded.
281 (encode-coding-region
282 (point-min) (point-max)
283 (mm-charset-to-coding-system
284 (car message-posting-charset
))))
285 ;; No encoding necessary, but folding is nice
289 (goto-char (point-min))
290 (skip-chars-forward "^:")
291 (when (looking-at ": ")
295 ;; We found something that may perhaps be encoded.
296 (re-search-forward "^[^:]+: *" nil t
)
298 ((eq method
'address-mime
)
299 (rfc2047-encode-region (point) (point-max)))
301 (let ((rfc2047-encoding-type 'mime
))
302 (rfc2047-encode-region (point) (point-max))))
303 ((eq method
'default
)
304 (if (and (default-value 'enable-multibyte-characters
)
306 (encode-coding-region (point) (point-max)
307 mail-parse-charset
)))
308 ;; We get this when CC'ing messages to newsgroups with
309 ;; 8-bit names. The group name mail copy just got
310 ;; unconditionally encoded. Previously, it would ask
311 ;; whether to encode, which was quite confusing for the
312 ;; user. If the new behavior is wrong, tell me. I have
313 ;; left the old code commented out below.
314 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
315 ;; Modified by Dave Love, with the commented-out code changed
316 ;; in accordance with changes elsewhere.
318 (rfc2047-encode-region (point) (point-max)))
320 ;;; (if (or (message-options-get
321 ;;; 'rfc2047-encode-message-header-encode-any)
322 ;;; (message-options-set
323 ;;; 'rfc2047-encode-message-header-encode-any
325 ;;; "Some texts are not encoded. Encode anyway?")))
326 ;;; (rfc2047-encode-region (point-min) (point-max))
327 ;;; (error "Cannot send unencoded text")))
328 ((mm-coding-system-p method
)
329 (when (default-value 'enable-multibyte-characters
)
330 (encode-coding-region (point) (point-max) method
)))
333 (goto-char (point-max))))))))
335 ;; Fixme: This, and the require below may not be the Right Thing, but
336 ;; should be safe just before release. -- fx 2001-02-08
338 (defun rfc2047-encodable-p ()
339 "Return non-nil if any characters in current buffer need encoding in headers.
340 The buffer may be narrowed."
341 (require 'message
) ; for message-posting-charset
343 (mm-find-mime-charset-region (point-min) (point-max))))
344 (goto-char (point-min))
345 (or (and rfc2047-encode-encoded-words
347 (re-search-forward rfc2047-encoded-word-regexp nil t
)
348 (goto-char (point-min))))
350 (not (equal charsets
(list (car message-posting-charset
))))))))
352 ;; Use this syntax table when parsing into regions that may need
353 ;; encoding. Double quotes are string delimiters, backslash is
354 ;; character quoting, and all other RFC 2822 special characters are
355 ;; treated as punctuation so we can use forward-sexp/forward-word to
356 ;; skip to the end of regions appropriately. Nb. ietf-drums does
357 ;; things differently.
358 (defconst rfc2047-syntax-table
359 ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
360 (let ((table (make-syntax-table)))
361 ;; The following is done to work for setting all elements of the table;
362 ;; it appears to be the cleanest way.
363 ;; Play safe and don't assume the form of the word syntax entry --
365 (set-char-table-range table t
(aref (standard-syntax-table) ?a
))
366 (modify-syntax-entry ?
\\ "\\" table
)
367 (modify-syntax-entry ?
\" "\"" table
)
368 (modify-syntax-entry ?\
( "(" table
)
369 (modify-syntax-entry ?\
) ")" table
)
370 (modify-syntax-entry ?\
< "." table
)
371 (modify-syntax-entry ?\
> "." table
)
372 (modify-syntax-entry ?\
[ "." table
)
373 (modify-syntax-entry ?\
] "." table
)
374 (modify-syntax-entry ?
: "." table
)
375 (modify-syntax-entry ?\
; "." table)
376 (modify-syntax-entry ?
, "." table
)
377 (modify-syntax-entry ?
@ "." table
)
380 (defun rfc2047-encode-region (b e
&optional dont-fold
)
381 "Encode words in region B to E that need encoding.
382 By default, the region is treated as containing RFC2822 addresses.
383 Dynamically bind `rfc2047-encoding-type' to change that."
385 (narrow-to-region b e
)
386 (let ((encodable-regexp (if rfc2047-encode-encoded-words
387 "[^\000-\177]+\\|=\\?"
389 start
; start of current token
391 ;; Whether there's an encoded word before the current token,
392 ;; either immediately or separated by space.
394 (orig-text (buffer-substring-no-properties b e
)))
395 (if (eq 'mime rfc2047-encoding-type
)
396 ;; Simple case. Continuous words in which all those contain
397 ;; non-ASCII characters are encoded collectively. Encoding
398 ;; ASCII words, including `Re:' used in Subject headers, is
399 ;; avoided for interoperability with non-MIME clients and
400 ;; for making it easy to find keywords.
402 (goto-char (point-min))
403 (while (progn (skip-chars-forward " \t\n")
406 (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)")
408 (setq end
(match-end 0))
409 (re-search-forward encodable-regexp end t
)))
411 (if (> (point) start
)
412 (rfc2047-encode start
(point))
414 ;; `address-mime' case -- take care of quoted words, comments.
415 (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp
)
416 (with-syntax-table rfc2047-syntax-table
417 (goto-char (point-min))
418 (condition-case err
; in case of unbalanced quotes
419 ;; Look for rfc2822-style: sequences of atoms, quoted
420 ;; strings, specials, whitespace. (Specials mustn't be
424 (skip-chars-forward " \t\n")
427 ((not (char-after))) ; eob
429 ((eq ?
\" (setq csyntax
(char-syntax (char-after))))
433 ;; Does it need encoding?
435 (if (re-search-forward encodable-regexp end
'move
)
436 ;; It needs encoding. Strip the quotes first,
437 ;; since encoded words can't occur in quotes.
444 ;; There was a preceding quoted word. We need
445 ;; to include any separating whitespace in this
446 ;; word to avoid it getting lost.
447 (skip-chars-backward " \t")
448 ;; A space is needed between the encoded words.
452 ;; Adjust the end position for the deleted quotes.
453 (rfc2047-encode start
(- end
2))
454 (setq last-encoded t
)) ; record that it was encoded
455 (setq last-encoded nil
)))
457 ;; Skip other delimiters, but record that they've
458 ;; potentially separated quoted words.
460 (setq last-encoded nil
))
462 (error "Unbalanced parentheses"))
464 ;; Look for the end of parentheses.
466 ;; Encode text as an unstructured field.
467 (let ((rfc2047-encoding-type 'mime
))
468 (rfc2047-encode-region (1+ start
) (1- (point))))
469 (skip-chars-forward ")"))
470 (t ; normal token/whitespace sequence
472 ;; Skip one ASCII word, or encode continuous words
473 ;; in which all those contain non-ASCII characters.
475 (while (not (or end
(eobp)))
476 (when (looking-at "[\000-\177]+")
480 (while (and (or (re-search-forward
481 "[ \t\n]\\|\\Sw" end
'move
)
483 (eq ?
\\ (char-syntax (char-before))))
484 ;; Skip backslash-quoted characters.
487 (setq end
(match-beginning 0))
488 (if rfc2047-encode-encoded-words
491 (when (search-forward "=?" end
'move
)
492 (goto-char (match-beginning 0))
495 ;; Where the value nil of `end' means there may be
496 ;; text to have to be encoded following the point.
497 ;; Otherwise, the point reached to the end of ASCII
498 ;; words separated by whitespace or a special char.
500 (when (looking-at encodable-regexp
)
501 (goto-char (setq begin
(match-end 0)))
502 (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)")
503 (setq end
(match-end 0))
505 (while (re-search-forward
506 encodable-regexp end t
))
509 (or (not (re-search-forward "\\Sw" end t
))
511 (goto-char (match-beginning 0))
514 (when (looking-at "[^ \t\n]+")
515 (setq end
(match-end 0))
516 (if (re-search-forward "\\Sw+" end t
)
517 ;; There are special characters better
518 ;; to be encoded so that MTAs may parse
520 (cond ((= end
(point)))
521 ((looking-at (concat "\\sw*\\("
526 (goto-char (1- (match-end 0)))
527 (unless (= (point) (match-beginning 0))
528 ;; Separate encodable text and
532 (skip-chars-forward " \t\n")
533 (if (and (looking-at "[^ \t\n]+")
534 (string-match encodable-regexp
537 (goto-char end
)))))))
538 (skip-chars-backward " \t\n")
541 (if (re-search-forward encodable-regexp end
'move
)
543 (unless (memq (char-before start
) '(nil ?
\t ?
))
546 (skip-chars-backward "^ \t\n")
547 (and (looking-at "\\Sw+")
548 (= (match-end 0) start
)))
549 ;; Also encode bogus delimiters.
551 ;; Separate encodable text and delimiter.
554 (setq start
(1+ start
)
556 (rfc2047-encode start end
)
557 (setq last-encoded t
))
558 (setq last-encoded nil
)))))
560 (if (or debug-on-quit debug-on-error
)
561 (signal (car err
) (cdr err
))
562 (error "Invalid data for rfc2047 encoding: %s"
563 (replace-regexp-in-string "[ \t\n]+" " " orig-text
))))))))
565 (rfc2047-fold-region b
(point)))
566 (goto-char (point-max))))
568 (defun rfc2047-encode-string (string &optional dont-fold
)
569 "Encode words in STRING.
570 By default, the string is treated as containing addresses (see
571 `rfc2047-encoding-type')."
572 (mm-with-multibyte-buffer
574 (rfc2047-encode-region (point-min) (point-max) dont-fold
)
578 ;; 2. Syntax of encoded-words
580 ;; While there is no limit to the length of a multiple-line header
581 ;; field, each line of a header field that contains one or more
582 ;; 'encoded-word's is limited to 76 characters.
584 ;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it.
585 (defvar rfc2047-encode-max-chars
76
586 "Maximum characters of each header line that contain encoded-words.
587 According to RFC 2047, it is 76. If it is nil, encoded-words
588 will not be folded. Too small value may cause an error. You
589 should not change this value.")
591 (defun rfc2047-encode-1 (column string cs encoder start crest tail
593 "Subroutine used by `rfc2047-encode'."
594 (cond ((string-equal string
"")
596 ((not rfc2047-encode-max-chars
)
598 (funcall encoder
(if cs
599 (encode-coding-string string cs
)
602 ((>= column rfc2047-encode-max-chars
)
604 (cond ((string-match "\n[ \t]+\\'" eword
)
605 ;; Remove a superfluous empty line.
606 (setq eword
(substring eword
0 (match-beginning 0))))
607 ((string-match "(+\\'" eword
)
608 ;; Break the line before the open parenthesis.
609 (setq crest
(concat crest
(match-string 0 eword
))
610 eword
(substring eword
0 (match-beginning 0))))))
611 (rfc2047-encode-1 (length crest
) string cs encoder start
" " tail
612 (concat eword
"\n" crest
)))
615 (limit (1- (length string
)))
620 (setq next
(concat start
623 (encode-coding-string
624 (substring string
0 (1+ index
))
626 (substring string
0 (1+ index
))))
628 len
(+ column
(length next
)))
629 (if (> len rfc2047-encode-max-chars
)
632 (if (or (< index limit
)
633 (<= (+ len
(or (string-match "\n" tail
)
635 rfc2047-encode-max-chars
))
638 (if (string-match "\\`)+" tail
)
639 ;; Break the line after the close parenthesis.
640 (setq tail
(concat (substring tail
0 (match-end 0))
642 (substring tail
(match-end 0)))
648 (concat eword next tail
)
651 (string-match "(+\\'" eword
))
652 (setq crest
(concat crest
(match-string 0 eword
))
653 eword
(substring eword
0 (match-beginning 0)))
654 (setq eword
(concat eword next
)))
656 eword
(concat eword next
)))
657 (when (string-match "\n[ \t]+\\'" eword
)
658 ;; Remove a superfluous empty line.
659 (setq eword
(substring eword
0 (match-beginning 0))))
660 (rfc2047-encode-1 (length crest
) (substring string index
)
661 cs encoder start
" " tail
662 (concat eword
"\n" crest
)))))))
664 (defun rfc2047-encode (b e
)
665 "Encode the word(s) in the region B to E.
666 Point moves to the end of the region."
667 (let ((mime-charset (or (mm-find-mime-charset-region b e
) (list 'us-ascii
)))
668 cs encoding tail crest eword
)
669 ;; Use utf-8 as a last resort if determining charset of text fails.
670 (if (memq nil mime-charset
)
671 (setq mime-charset
(list 'utf-8
)))
672 (cond ((> (length mime-charset
) 1)
673 (error "Can't rfc2047-encode `%s'"
674 (buffer-substring-no-properties b e
)))
675 ((= (length mime-charset
) 1)
676 (setq mime-charset
(car mime-charset
)
677 cs
(mm-charset-to-coding-system mime-charset
))
678 (unless (and (mm-multibyte-p)
679 (mm-coding-system-p cs
))
682 (narrow-to-region b e
)
684 (or (cdr (assq mime-charset
685 rfc2047-charset-encoding-alist
))
686 ;; For the charsets that don't have a preferred
687 ;; encoding, choose the one that's shorter.
688 (if (eq (rfc2047-qp-or-base64) 'base64
)
693 (skip-chars-forward "^ \t\n")
694 ;; `tail' may contain a close parenthesis.
695 (setq tail
(buffer-substring-no-properties e
(point)))
697 (setq b
(point-marker)
698 e
(set-marker (make-marker) e
))
699 (rfc2047-fold-region (point-at-bol) b
)
701 (skip-chars-backward "^ \t\n")
702 (unless (= 0 (skip-chars-backward " \t"))
703 ;; `crest' may contain whitespace and an open parenthesis.
704 (setq crest
(buffer-substring-no-properties (point) b
)))
705 (setq eword
(rfc2047-encode-1
707 (replace-regexp-in-string
708 "\n\\([ \t]?\\)" "\\1"
709 (buffer-substring-no-properties b e
))
711 (or (cdr (assq encoding
712 rfc2047-encode-function-alist
))
714 (concat "=?" (downcase (symbol-name mime-charset
))
715 "?" (upcase (symbol-name encoding
)) "?")
718 (delete-region (if (eq (aref eword
0) ?
\n)
720 ;; The line was folded before encoding.
725 ;; `eword' contains `crest' and `tail'.
729 (unless (or (/= 0 (length tail
))
731 (looking-at "[ \t\n)]"))
736 (defun rfc2047-fold-field ()
737 "Fold the current header field."
740 (rfc2047-narrow-to-field)
741 (rfc2047-fold-region (point-min) (point-max)))))
743 (defun rfc2047-fold-region (b e
)
744 "Fold long lines in region B to E."
746 (narrow-to-region b e
)
747 (goto-char (point-min))
751 (bol (save-restriction
755 (when (and (or break qword-break
)
756 (> (- (point) bol
) 76))
757 (goto-char (or break qword-break
))
760 (skip-chars-backward " \t")
761 (if (looking-at "[ \t]")
764 (setq bol
(1- (point)))
765 ;; Don't break before the first non-LWSP characters.
766 (skip-chars-forward " \t")
770 ((eq (char-after) ?
\n)
775 (skip-chars-forward " \t")
776 (unless (or (eobp) (eq (char-after) ?
\n))
778 ((eq (char-after) ?
\r)
780 ((memq (char-after) '(? ?
\t))
781 (skip-chars-forward " \t")
782 (unless first
;; Don't break just after the header name.
783 (setq break
(point))))
785 (if (not (looking-at "=\\?[^=]"))
786 (if (eq (char-after) ?
=)
788 (skip-chars-forward "^ \t\n\r="))
789 ;; Don't break at the start of the field.
790 (unless (= (point) b
)
791 (setq qword-break
(point)))
792 (skip-chars-forward "^ \t\n\r")))
794 (skip-chars-forward "^ \t\n\r")))
796 (when (and (or break qword-break
)
797 (> (- (point) bol
) 76))
798 (goto-char (or break qword-break
))
801 (if (or (> 0 (skip-chars-backward " \t"))
802 (looking-at "[ \t]"))
805 (setq bol
(1- (point)))
806 ;; Don't break before the first non-LWSP characters.
807 (skip-chars-forward " \t")
809 (forward-char 1))))))
811 (defun rfc2047-unfold-field ()
812 "Fold the current line."
815 (rfc2047-narrow-to-field)
816 (rfc2047-unfold-region (point-min) (point-max)))))
818 (defun rfc2047-unfold-region (b e
)
819 "Unfold lines in region B to E."
821 (narrow-to-region b e
)
822 (goto-char (point-min))
823 (let ((bol (save-restriction
826 (eol (point-at-eol)))
829 (if (and (looking-at "[ \t]")
830 (< (- (point-at-eol) bol
) 76))
831 (delete-region eol
(progn
833 (skip-chars-forward "\r\n")
835 (setq bol
(point-at-bol)))
836 (setq eol
(point-at-eol))
839 (defun rfc2047-b-encode-string (string)
840 "Base64-encode the header contained in STRING."
841 (base64-encode-string string t
))
843 (autoload 'quoted-printable-encode-region
"qp")
845 (defun rfc2047-q-encode-string (string)
846 "Quoted-printable-encode the header in STRING."
847 (mm-with-unibyte-buffer
849 (quoted-printable-encode-region
850 (point-min) (point-max) nil
851 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
852 ;; Avoid using 8bit characters.
853 ;; This list excludes `especials' (see the RFC2047 syntax),
854 ;; meaning that some characters in non-structured fields will
855 ;; get encoded when they con't need to be. The following is
856 ;; what it used to be.
857 ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
858 ;;; "\010\012\014\040-\074\076\100-\136\140-\177")
859 "-\b\n\f !#-'*+0-9A-Z\\^`-~\d")
860 (subst-char-in-region (point-min) (point-max) ? ?_
)
863 (defun rfc2047-encode-parameter (param value
)
864 "Return and PARAM=VALUE string encoded in the RFC2047-like style.
865 This is a substitution for the `rfc2231-encode-string' function, that
866 is the standard but many mailers don't support it."
867 (let ((rfc2047-encoding-type 'mime
)
868 (rfc2047-encode-max-chars nil
))
869 (rfc2045-encode-string param
(rfc2047-encode-string value t
))))
872 ;;; Functions for decoding RFC2047 messages
875 (defvar rfc2047-quote-decoded-words-containing-tspecials nil
876 "If non-nil, quote decoded words containing special characters.")
878 (defcustom rfc2047-allow-incomplete-encoded-text t
879 "Non-nil means allow incomplete encoded-text in successive encoded-words.
880 Dividing of encoded-text in the place other than character boundaries
881 violates RFC2047 section 5, while we have a capability to decode it.
882 If it is non-nil, the decoder will decode B- or Q-encoding in each
883 encoded-word, concatenate them, and decode it by charset. Otherwise,
884 the decoder will fully decode each encoded-word before concatenating
888 (defun rfc2047-strip-backslashes-in-quoted-strings ()
889 "Strip backslashes in quoted strings. `\\\"' remains."
890 (goto-char (point-min))
892 (with-syntax-table (standard-syntax-table)
893 (while (search-forward "\"" nil t
)
894 (unless (eq (char-before) ?
\\)
895 (setq beg
(match-end 0))
896 (goto-char (match-beginning 0))
901 (narrow-to-region beg
(1- (point)))
903 (while (search-forward "\\" nil
'move
)
904 (unless (memq (char-after) '(?
\"))
909 (goto-char beg
))))))))
911 (defun rfc2047-charset-to-coding-system (charset &optional allow-override
)
912 "Return coding-system corresponding to MIME CHARSET.
913 If your Emacs implementation can't decode CHARSET, return nil.
915 If allow-override is given, use `mm-charset-override-alist' to
916 map undesired charset names to their replacement. This should
917 only be used for decoding, not for encoding."
918 (when (stringp charset
)
919 (setq charset
(intern (downcase charset
))))
920 (when (or (not charset
)
921 (eq 'gnus-all mail-parse-ignored-charsets
)
922 (memq 'gnus-all mail-parse-ignored-charsets
)
923 (memq charset mail-parse-ignored-charsets
))
924 (setq charset mail-parse-charset
))
925 (let ((cs (mm-charset-to-coding-system charset nil allow-override
)))
926 (cond ((eq cs
'ascii
)
927 (setq cs
(or (mm-charset-to-coding-system mail-parse-charset
)
929 ((mm-coding-system-p cs
))
931 (listp mail-parse-ignored-charsets
)
932 (memq 'gnus-unknown mail-parse-ignored-charsets
))
933 (setq cs
(mm-charset-to-coding-system mail-parse-charset
))))
938 (autoload 'quoted-printable-decode-string
"qp")
940 (defun rfc2047-decode-encoded-words (words)
941 "Decode successive encoded-words in WORDS and return a decoded string.
942 Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT
944 (let (word charset cs encoding text rest
)
946 (setq word
(pop words
))
947 (if (and (setq cs
(rfc2047-charset-to-coding-system
948 (setq charset
(car word
)) t
))
950 (cond ((char-equal ?B
(nth 1 word
))
951 (setq text
(base64-decode-string
952 (rfc2047-pad-base64 (nth 2 word
)))))
953 ((char-equal ?Q
(nth 1 word
))
954 (setq text
(quoted-printable-decode-string
955 (subst-char-in-string
956 ?_ ?
(nth 2 word
) t
)))))
958 (message "%s" (error-message-string code
))
960 (if (and rfc2047-allow-incomplete-encoded-text
962 ;; Concatenate text of which the charset is the same.
963 (setcdr (car rest
) (concat (cdar rest
) text
))
964 (push (cons cs text
) rest
))
965 ;; Don't decode encoded-word.
966 (push (cons nil
(nth 3 word
)) rest
)))
969 (or (and (setq cs
(caar rest
))
971 (decode-coding-string (cdar rest
) cs
)
973 (message "%s" (error-message-string code
))
975 (concat (when (cdr rest
) " ")
978 (not (eq (string-to-char words
) ?
)))
984 ;; Fixme: This should decode in place, not cons intermediate strings.
985 ;; Also check whether it needs to worry about delimiting fields like
988 ;; In fact it's reported that (invalid) encoding of mailboxes in
989 ;; addr-specs is in use, so delimiting fields might help. Probably
990 ;; not decoding a word which isn't properly delimited is good enough
991 ;; and worthwhile (is it more correct or not?), e.g. something like
992 ;; `=?iso-8859-1?q?foo?=@'.
994 (defun rfc2047-decode-region (start end
&optional address-mime
)
995 "Decode MIME-encoded words in region between START and END.
996 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
997 other than `\"' and `\\' in quoted strings."
999 (let ((case-fold-search t
)
1001 (if rfc2047-allow-irregular-q-encoded-words
1003 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp-loose
"\\)"))
1005 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp
"\\)"))))
1009 (narrow-to-region start end
)
1011 (rfc2047-strip-backslashes-in-quoted-strings))
1012 (goto-char (setq b start
))
1013 ;; Look for the encoded-words.
1014 (while (setq match
(re-search-forward eword-regexp nil t
))
1015 (setq e
(match-beginning 1)
1019 (push (list (match-string 2) ;; charset
1020 (char-after (match-beginning 3)) ;; encoding
1021 (substring (match-string 3) 2) ;; encoded-text
1022 (match-string 1)) ;; encoded-word
1024 ;; Look for the subsequent encoded-words.
1025 (when (setq match
(looking-at eword-regexp
))
1026 (goto-char (setq end
(match-end 0)))))
1027 ;; Replace the encoded-words with the decoded one.
1028 (delete-region e end
)
1029 (insert (rfc2047-decode-encoded-words (nreverse words
)))
1031 (narrow-to-region e
(point))
1033 ;; Remove newlines between decoded words, though such
1034 ;; things essentially must not be there.
1035 (while (re-search-forward "[\n\r]+" nil t
)
1036 (replace-match " "))
1037 (setq end
(point-max))
1038 ;; Quote decoded words if there are special characters
1039 ;; which might violate RFC2822.
1040 (when (and rfc2047-quote-decoded-words-containing-tspecials
1041 (let ((regexp (car (rassq
1043 rfc2047-header-encoding-alist
))))
1048 ;; Don't quote words if already quoted.
1049 (not (and (eq (char-before e
) ?
\")
1050 (eq (char-after end
) ?
\")))
1053 (while (and (memq (char-after) '(? ?
\t))
1054 (zerop (forward-line -
1))))
1055 (looking-at regexp
)))))))
1058 (skip-chars-forward " \t")
1059 (setq start
(point))
1060 (setq quoted
(eq (char-after) ?
\"))
1061 (goto-char (point-max))
1062 (skip-chars-backward " \t" start
)
1063 (if (setq quoted
(and quoted
1064 (> (point) (1+ start
))
1065 (eq (char-before) ?
\")))
1068 (setq start
(1+ start
)
1069 end
(point-marker)))
1070 (setq end
(point-marker)))
1072 (while (search-forward "\"" end t
)
1075 (zerop (%
(skip-chars-backward "\\\\") 2))
1076 (goto-char (match-beginning 0)))
1079 (when (and (not quoted
)
1083 (concat "[" ietf-drums-tspecials
"]")
1089 (set-marker end nil
)))
1090 (goto-char (point-max)))
1091 (when (and (mm-multibyte-p)
1093 (not (eq mail-parse-charset
'us-ascii
))
1094 (not (eq mail-parse-charset
'gnus-decoded
)))
1095 (decode-coding-region b e mail-parse-charset
))
1097 (when (and (mm-multibyte-p)
1099 (not (eq mail-parse-charset
'us-ascii
))
1100 (not (eq mail-parse-charset
'gnus-decoded
)))
1101 (decode-coding-region b
(point-max) mail-parse-charset
))))))
1103 (defun rfc2047-decode-address-region (start end
)
1104 "Decode MIME-encoded words in region between START and END.
1105 Backslashes which precede characters other than `\"' and `\\' in quoted
1106 strings are stripped."
1107 (rfc2047-decode-region start end t
))
1109 (defun rfc2047-decode-string (string &optional address-mime
)
1110 "Decode MIME-encoded STRING and return the result.
1111 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
1112 other than `\"' and `\\' in quoted strings."
1113 (if (string-match "=\\?" string
)
1115 ;; We used to only call mm-enable-multibyte if `m' is non-nil,
1116 ;; but this can't be the right criterion. Don't just revert this
1117 ;; change if it encounters a bug. Please help me fix it
1118 ;; right instead. --Stef
1119 ;; The string returned should always be multibyte in a multibyte
1120 ;; session, i.e. the buffer should be multibyte before
1121 ;; `buffer-string' is called.
1122 (mm-enable-multibyte)
1125 (rfc2047-decode-region (point-min) (point-max) address-mime
))
1130 (when (multibyte-string-p string
)
1131 (mm-enable-multibyte))
1133 (rfc2047-strip-backslashes-in-quoted-strings)
1135 ;; Fixme: As above, `m' here is inappropriate.
1138 (not (eq mail-parse-charset
'us-ascii
))
1139 (not (eq mail-parse-charset
'gnus-decoded
)))
1140 ;; `decode-coding-string' in Emacs offers a third optional
1141 ;; arg NOCOPY to avoid consing a new string if the decoding
1142 ;; is "trivial". Unfortunately it currently doesn't
1143 ;; consider anything else than a nil coding system
1145 ;; `rfc2047-decode-string' is called multiple times for each
1146 ;; article during summary buffer generation, and we really
1147 ;; want to avoid unnecessary consing. So we bypass
1148 ;; `decode-coding-string' if the string is purely ASCII.
1149 (if (eq (detect-coding-string string t
) 'undecided
)
1150 ;; string is purely ASCII
1152 (decode-coding-string string mail-parse-charset
))
1153 (string-to-multibyte string
))))
1155 (defun rfc2047-decode-address-string (string)
1156 "Decode MIME-encoded STRING and return the result.
1157 Backslashes which precede characters other than `\"' and `\\' in quoted
1158 strings are stripped."
1159 (rfc2047-decode-string string t
))
1161 (defun rfc2047-pad-base64 (string)
1162 "Pad STRING to quartets."
1163 ;; Be more liberal to accept buggy base64 strings. If
1164 ;; base64-decode-string accepts buggy strings, this function could
1165 ;; be aliased to identity.
1166 (if (= 0 (mod (length string
) 4))
1168 (when (string-match "=+$" string
)
1169 (setq string
(substring string
0 (match-beginning 0))))
1170 (case (mod (length string
) 4)
1172 (1 string
) ;; Error, don't pad it.
1173 (2 (concat string
"=="))
1174 (3 (concat string
"=")))))
1178 ;;; rfc2047.el ends here