lisp/gnus/mm-util.el

   1 ;;; mm-util.el --- Utility functions for Mule and low level things
   2
   3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4 ;;   2005, 2006 Free Software Foundation, Inc.
   5
   6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
   7 ;;      MORIOKA Tomohiko <morioka@jaist.ac.jp>
   8 ;; This file is part of GNU Emacs.
   9
  10 ;; GNU Emacs is free software; you can redistribute it and/or modify
  11 ;; it under the terms of the GNU General Public License as published by
  12 ;; the Free Software Foundation; either version 2, or (at your option)
  13 ;; any later version.
  14
  15 ;; GNU Emacs is distributed in the hope that it will be useful,
  16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 ;; GNU General Public License for more details.
  19
  20 ;; You should have received a copy of the GNU General Public License
  21 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  23 ;; Boston, MA 02110-1301, USA.
  24
  25 ;;; Commentary:
  26
  27 ;;; Code:
  28
  29 (eval-when-compile (require 'cl))
  30 (require 'mail-prsvr)
  31
  32 (eval-and-compile
  33   (mapcar
  34    (lambda (elem)
  35      (let ((nfunc (intern (format "mm-%s" (car elem)))))
  36        (if (fboundp (car elem))
  37            (defalias nfunc (car elem))
  38          (defalias nfunc (cdr elem)))))
  39    '((decode-coding-string . (lambda (s a) s))
  40      (encode-coding-string . (lambda (s a) s))
  41      (encode-coding-region . ignore)
  42      (coding-system-list . ignore)
  43      (decode-coding-region . ignore)
  44      (char-int . identity)
  45      (coding-system-equal . equal)
  46      (annotationp . ignore)
  47      (set-buffer-file-coding-system . ignore)
  48      (make-char
  49       . (lambda (charset int)
  50           (int-to-char int)))
  51      (read-charset
  52       . (lambda (prompt)
  53           "Return a charset."
  54           (intern
  55            (completing-read
  56             prompt
  57             (mapcar (lambda (e) (list (symbol-name (car e))))
  58                     mm-mime-mule-charset-alist)
  59             nil t))))
  60      (subst-char-in-string
  61       . (lambda (from to string &optional inplace)
  62           ;; stolen (and renamed) from nnheader.el
  63           "Replace characters in STRING from FROM to TO.
  64           Unless optional argument INPLACE is non-nil, return a new string."
  65           (let ((string (if inplace string (copy-sequence string)))
  66                 (len (length string))
  67                 (idx 0))
  68             ;; Replace all occurrences of FROM with TO.
  69             (while (< idx len)
  70               (when (= (aref string idx) from)
  71                 (aset string idx to))
  72               (setq idx (1+ idx)))
  73             string)))
  74      (string-as-unibyte . identity)
  75      (string-make-unibyte . identity)
  76      ;; string-as-multibyte often doesn't really do what you think it does.
  77      ;; Example:
  78      ;;    (aref (string-as-multibyte "\201") 0) -> 129 (aka ?\201)
  79      ;;    (aref (string-as-multibyte "\300") 0) -> 192 (aka ?\300)
  80      ;;    (aref (string-as-multibyte "\300\201") 0) -> 192 (aka ?\300)
  81      ;;    (aref (string-as-multibyte "\300\201") 1) -> 129 (aka ?\201)
  82      ;; but
  83      ;;    (aref (string-as-multibyte "\201\300") 0) -> 2240
  84      ;;    (aref (string-as-multibyte "\201\300") 1) -> <error>
  85      ;; Better use string-to-multibyte or encode-coding-string.
  86      ;; If you really need string-as-multibyte somewhere it's usually
  87      ;; because you're using the internal emacs-mule representation (maybe
  88      ;; because you're using string-as-unibyte somewhere), which is
  89      ;; generally a problem in itself.
  90      ;; Here is an approximate equivalence table to help think about it:
  91      ;; (string-as-multibyte s)   ~= (decode-coding-string s 'emacs-mule)
  92      ;; (string-to-multibyte s)   ~= (decode-coding-string s 'binary)
  93      ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system)
  94      (string-as-multibyte . identity)
  95      (multibyte-string-p . ignore)
  96      (insert-byte . insert-char)
  97      (multibyte-char-to-unibyte . identity))))
  98
  99 (eval-and-compile
 100   (cond
 101    ((fboundp 'replace-in-string)
 102     (defalias 'mm-replace-in-string 'replace-in-string))
 103    ((fboundp 'replace-regexp-in-string)
 104     (defun mm-replace-in-string (string regexp newtext &optional literal)
 105       "Replace all matches for REGEXP with NEWTEXT in STRING.
 106 If LITERAL is non-nil, insert NEWTEXT literally.  Return a new
 107 string containing the replacements.
 108
 109 This is a compatibility function for different Emacsen."
 110       (replace-regexp-in-string regexp newtext string nil literal)))
 111    (t
 112     (defun mm-replace-in-string (string regexp newtext &optional literal)
 113       "Replace all matches for REGEXP with NEWTEXT in STRING.
 114 If LITERAL is non-nil, insert NEWTEXT literally.  Return a new
 115 string containing the replacements.
 116
 117 This is a compatibility function for different Emacsen."
 118       (let ((start 0) tail)
 119         (while (string-match regexp string start)
 120           (setq tail (- (length string) (match-end 0)))
 121           (setq string (replace-match newtext nil literal string))
 122           (setq start (- (length string) tail))))
 123       string))))
 124
 125 (defalias 'mm-string-to-multibyte
 126   (cond
 127    ((featurep 'xemacs)
 128     'identity)
 129    ((fboundp 'string-to-multibyte)
 130     'string-to-multibyte)
 131    (t
 132     (lambda (string)
 133       "Return a multibyte string with the same individual chars as string."
 134       (mapconcat
 135        (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
 136        string "")))))
 137
 138 (eval-and-compile
 139   (defalias 'mm-char-or-char-int-p
 140     (cond
 141      ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
 142      ((fboundp 'char-valid-p) 'char-valid-p)
 143      (t 'identity))))
 144
 145 ;; Fixme:  This seems always to be used to read a MIME charset, so it
 146 ;; should be re-named and fixed (in Emacs) to offer completion only on
 147 ;; proper charset names (base coding systems which have a
 148 ;; mime-charset defined).  XEmacs doesn't believe in mime-charset;
 149 ;; test with
 150 ;;   `(or (coding-system-get 'iso-8859-1 'mime-charset)
 151 ;;        (coding-system-get 'iso-8859-1 :mime-charset))'
 152 ;; Actually, there should be an `mm-coding-system-mime-charset'.
 153 (eval-and-compile
 154   (defalias 'mm-read-coding-system
 155     (cond
 156      ((fboundp 'read-coding-system)
 157       (if (and (featurep 'xemacs)
 158                (<= (string-to-number emacs-version) 21.1))
 159           (lambda (prompt &optional default-coding-system)
 160             (read-coding-system prompt))
 161         'read-coding-system))
 162      (t (lambda (prompt &optional default-coding-system)
 163           "Prompt the user for a coding system."
 164           (completing-read
 165            prompt (mapcar (lambda (s) (list (symbol-name (car s))))
 166                           mm-mime-mule-charset-alist)))))))
 167
 168 (defvar mm-coding-system-list nil)
 169 (defun mm-get-coding-system-list ()
 170   "Get the coding system list."
 171   (or mm-coding-system-list
 172       (setq mm-coding-system-list (mm-coding-system-list))))
 173
 174 (defun mm-coding-system-p (cs)
 175   "Return non-nil if CS is a symbol naming a coding system.
 176 In XEmacs, also return non-nil if CS is a coding system object.
 177 If CS is available, return CS itself in Emacs, and return a coding
 178 system object in XEmacs."
 179   (if (fboundp 'find-coding-system)
 180       (and cs (find-coding-system cs))
 181     (if (fboundp 'coding-system-p)
 182         (when (coding-system-p cs)
 183           cs)
 184       ;; no-MULE XEmacs:
 185       (car (memq cs (mm-get-coding-system-list))))))
 186
 187 (defun mm-codepage-setup (number &optional alias)
 188   "Create a coding system cpNUMBER.
 189 The coding system is created using `codepage-setup'.  If ALIAS is
 190 non-nil, an alias is created and added to
 191 `mm-charset-synonym-alist'.  If ALIAS is a string, it's used as
 192 the alias.  Else windows-NUMBER is used."
 193   (interactive
 194    (let ((completion-ignore-case t)
 195          (candidates (cp-supported-codepages)))
 196      (list (completing-read "Setup DOS Codepage: (default 437) " candidates
 197                             nil t nil nil "437"))))
 198   (when alias
 199     (setq alias (if (stringp alias)
 200                     (intern alias)
 201                   (intern (format "windows-%s" number)))))
 202   (let* ((cp (intern (format "cp%s" number))))
 203     (unless (mm-coding-system-p cp)
 204       (codepage-setup number))
 205     (when (and alias
 206                ;; Don't add alias if setup of cp failed.
 207                (mm-coding-system-p cp))
 208       (add-to-list 'mm-charset-synonym-alist (cons alias cp)))))
 209
 210 (defvar mm-charset-synonym-alist
 211   `(
 212     ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
 213     ,@(unless (mm-coding-system-p 'x-ctext)
 214         '((x-ctext . ctext)))
 215     ;; ISO-8859-15 is very similar to ISO-8859-1.  But it's _different_ in 8
 216     ;; positions!
 217     ,@(unless (mm-coding-system-p 'iso-8859-15)
 218         '((iso-8859-15 . iso-8859-1)))
 219     ;; BIG-5HKSCS is similar to, but different than, BIG-5.
 220     ,@(unless (mm-coding-system-p 'big5-hkscs)
 221         '((big5-hkscs . big5)))
 222     ;; A Microsoft misunderstanding.
 223     ,@(when (and (not (mm-coding-system-p 'unicode))
 224                  (mm-coding-system-p 'utf-16-le))
 225         '((unicode . utf-16-le)))
 226     ;; A Microsoft misunderstanding.
 227     ,@(unless (mm-coding-system-p 'ks_c_5601-1987)
 228         (if (mm-coding-system-p 'cp949)
 229             '((ks_c_5601-1987 . cp949))
 230           '((ks_c_5601-1987 . euc-kr))))
 231     ;; Windows-31J is Windows Codepage 932.
 232     ,@(when (and (not (mm-coding-system-p 'windows-31j))
 233                  (mm-coding-system-p 'cp932))
 234         '((windows-31j . cp932)))
 235     )
 236   "A mapping from unknown or invalid charset names to the real charset names.
 237
 238 See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.")
 239
 240 (defcustom mm-codepage-iso-8859-list
 241   (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
 242         ;; Outlook users in Czech republic.  Use this to allow reading of
 243         ;; their e-mails.  cp1250 should be defined by M-x codepage-setup
 244         ;; (Emacs 21).
 245         '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West
 246                     ;; Europe).  See also `gnus-article-dumbquotes-map'.
 247         '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish).
 248         '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew).
 249   "A list of Windows codepage numbers and iso-8859 charset numbers.
 250
 251 If an element is a number corresponding to a supported windows
 252 codepage, appropriate entries to `mm-charset-synonym-alist' are
 253 added by `mm-setup-codepage-iso-8859'.  An element may also be a
 254 cons cell where the car is a codepage number and the cdr is the
 255 corresponding number of an iso-8859 charset."
 256   :type '(list (set :inline t
 257                     (const 1250 :tag "Central and East European")
 258                     (const (1252 . 1) :tag "West European")
 259                     (const (1254 . 9) :tag "Turkish")
 260                     (const (1255 . 8) :tag "Hebrew"))
 261                (repeat :inline t
 262                        :tag "Other options"
 263                        (choice
 264                         (integer :tag "Windows codepage number")
 265                         (cons (integer :tag "Windows codepage number")
 266                               (integer :tag "iso-8859 charset  number")))))
 267   :version "22.1" ;; Gnus 5.10.9
 268   :group 'mime)
 269
 270 (defcustom mm-codepage-ibm-list
 271   (list 437 ;; (US etc.)
 272         860 ;; (Portugal)
 273         861 ;; (Iceland)
 274         862 ;; (Israel)
 275         863 ;; (Canadian French)
 276         865 ;; (Nordic)
 277         852 ;;
 278         850 ;; (Latin 1)
 279         855 ;; (Cyrillic)
 280         866 ;; (Cyrillic - Russian)
 281         857 ;; (Turkish)
 282         864 ;; (Arabic)
 283         869 ;; (Greek)
 284         874);; (Thai)
 285   ;; In Emacs 23 (unicode), cp... and ibm... are aliases.
 286   ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de
 287   "List of IBM codepage numbers.
 288
 289 The codepage mappings slighly differ between IBM and other vendors.
 290 See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\".
 291
 292 If an element is a number corresponding to a supported windows
 293 codepage, appropriate entries to `mm-charset-synonym-alist' are
 294 added by `mm-setup-codepage-ibm'."
 295   :type '(list (set :inline t
 296                     (const 437 :tag "US etc.")
 297                     (const 860 :tag "Portugal")
 298                     (const 861 :tag "Iceland")
 299                     (const 862 :tag "Israel")
 300                     (const 863 :tag "Canadian French")
 301                     (const 865 :tag "Nordic")
 302                     (const 852)
 303                     (const 850 :tag "Latin 1")
 304                     (const 855 :tag "Cyrillic")
 305                     (const 866 :tag "Cyrillic - Russian")
 306                     (const 857 :tag "Turkish")
 307                     (const 864 :tag "Arabic")
 308                     (const 869 :tag "Greek")
 309                     (const 874 :tag "Thai"))
 310                (repeat :inline t
 311                        :tag "Other options"
 312                        (integer :tag "Codepage number")))
 313   :version "22.1" ;; Gnus 5.10.9
 314   :group 'mime)
 315
 316 (defun mm-setup-codepage-iso-8859 (&optional list)
 317   "Add appropriate entries to `mm-charset-synonym-alist'.
 318 Unless LIST is given, `mm-codepage-iso-8859-list' is used."
 319   (unless list
 320     (setq list mm-codepage-iso-8859-list))
 321   (dolist (i list)
 322     (let (cp windows iso)
 323       (if (consp i)
 324           (setq cp (intern (format "cp%d" (car i)))
 325                 windows (intern (format "windows-%d" (car i)))
 326                 iso (intern (format "iso-8859-%d" (cdr i))))
 327         (setq cp (intern (format "cp%d" i))
 328               windows (intern (format "windows-%d" i))))
 329       (unless (mm-coding-system-p windows)
 330         (if (mm-coding-system-p cp)
 331             (add-to-list 'mm-charset-synonym-alist (cons windows cp))
 332           (add-to-list 'mm-charset-synonym-alist (cons windows iso)))))))
 333
 334 (defun mm-setup-codepage-ibm (&optional list)
 335   "Add appropriate entries to `mm-charset-synonym-alist'.
 336 Unless LIST is given, `mm-codepage-ibm-list' is used."
 337   (unless list
 338     (setq list mm-codepage-ibm-list))
 339   (dolist (number list)
 340     (let ((ibm (intern (format "ibm%d" number)))
 341           (cp  (intern (format "cp%d" number))))
 342       (when (and (not (mm-coding-system-p ibm))
 343                  (mm-coding-system-p cp))
 344         (add-to-list 'mm-charset-synonym-alist (cons ibm cp))))))
 345
 346 ;; Initialize:
 347 (mm-setup-codepage-iso-8859)
 348 (mm-setup-codepage-ibm)
 349
 350 (defcustom mm-charset-override-alist
 351   `((iso-8859-1 . windows-1252))
 352   "A mapping from undesired charset names to their replacement.
 353
 354 You may add pairs like (iso-8859-1 . windows-1252) here,
 355 i.e. treat iso-8859-1 as windows-1252.  windows-1252 is a
 356 superset of iso-8859-1."
 357   :type '(list (set :inline t
 358                     (const (iso-8859-1 . windows-1252))
 359                     (const (undecided  . windows-1252)))
 360                (repeat :inline t
 361                        :tag "Other options"
 362                        (cons (symbol :tag "From charset")
 363                              (symbol :tag "To charset"))))
 364   :version "22.1" ;; Gnus 5.10.9
 365   :group 'mime)
 366
 367 (defcustom mm-charset-eval-alist
 368   (if (featurep 'xemacs)
 369       nil ;; I don't know what would be useful for XEmacs.
 370     '(;; Emacs 21 offers 1250 1251 1253 1257.  Emacs 22 provides autoloads for
 371       ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing).
 372       (windows-1250 . (mm-codepage-setup 1250 t))
 373       (windows-1251 . (mm-codepage-setup 1251 t))
 374       (windows-1253 . (mm-codepage-setup 1253 t))
 375       (windows-1257 . (mm-codepage-setup 1257 t))))
 376   "An alist of (CHARSET . FORM) pairs.
 377 If an article is encoded in an unknown CHARSET, FORM is
 378 evaluated.  This allows to load additional libraries providing
 379 charsets on demand.  If supported by your Emacs version, you
 380 could use `autoload-coding-system' here."
 381   :version "22.1" ;; Gnus 5.10.9
 382   :type '(list (set :inline t
 383                     (const (windows-1250 . (mm-codepage-setup 1250 t)))
 384                     (const (windows-1251 . (mm-codepage-setup 1251 t)))
 385                     (const (windows-1253 . (mm-codepage-setup 1253 t)))
 386                     (const (windows-1257 . (mm-codepage-setup 1257 t)))
 387                     (const (cp850 . (mm-codepage-setup 850 nil))))
 388                (repeat :inline t
 389                        :tag "Other options"
 390                        (cons (symbol :tag "charset")
 391                              (symbol :tag "form"))))
 392   :group 'mime)
 393
 394 (defvar mm-binary-coding-system
 395   (cond
 396    ((mm-coding-system-p 'binary) 'binary)
 397    ((mm-coding-system-p 'no-conversion) 'no-conversion)
 398    (t nil))
 399   "100% binary coding system.")
 400
 401 (defvar mm-text-coding-system
 402   (or (if (memq system-type '(windows-nt ms-dos ms-windows))
 403           (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
 404         (and (mm-coding-system-p 'raw-text) 'raw-text))
 405       mm-binary-coding-system)
 406   "Text-safe coding system (For removing ^M).")
 407
 408 (defvar mm-text-coding-system-for-write nil
 409   "Text coding system for write.")
 410
 411 (defvar mm-auto-save-coding-system
 412   (cond
 413    ((mm-coding-system-p 'utf-8-emacs)   ; Mule 7
 414     (if (memq system-type '(windows-nt ms-dos ms-windows))
 415         (if (mm-coding-system-p 'utf-8-emacs-dos)
 416             'utf-8-emacs-dos mm-binary-coding-system)
 417       'utf-8-emacs))
 418    ((mm-coding-system-p 'emacs-mule)
 419     (if (memq system-type '(windows-nt ms-dos ms-windows))
 420         (if (mm-coding-system-p 'emacs-mule-dos)
 421             'emacs-mule-dos mm-binary-coding-system)
 422       'emacs-mule))
 423    ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
 424    (t mm-binary-coding-system))
 425   "Coding system of auto save file.")
 426
 427 (defvar mm-universal-coding-system mm-auto-save-coding-system
 428   "The universal coding system.")
 429
 430 ;; Fixme: some of the cars here aren't valid MIME charsets.  That
 431 ;; should only matter with XEmacs, though.
 432 (defvar mm-mime-mule-charset-alist
 433   `((us-ascii ascii)
 434     (iso-8859-1 latin-iso8859-1)
 435     (iso-8859-2 latin-iso8859-2)
 436     (iso-8859-3 latin-iso8859-3)
 437     (iso-8859-4 latin-iso8859-4)
 438     (iso-8859-5 cyrillic-iso8859-5)
 439     ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
 440     ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
 441     ;; charset is koi8-r, not iso-8859-5.
 442     (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
 443     (iso-8859-6 arabic-iso8859-6)
 444     (iso-8859-7 greek-iso8859-7)
 445     (iso-8859-8 hebrew-iso8859-8)
 446     (iso-8859-9 latin-iso8859-9)
 447     (iso-8859-14 latin-iso8859-14)
 448     (iso-8859-15 latin-iso8859-15)
 449     (viscii vietnamese-viscii-lower)
 450     (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
 451     (euc-kr korean-ksc5601)
 452     (gb2312 chinese-gb2312)
 453     (big5 chinese-big5-1 chinese-big5-2)
 454     (tibetan tibetan)
 455     (thai-tis620 thai-tis620)
 456     (windows-1251 cyrillic-iso8859-5)
 457     (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
 458     (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
 459                    latin-jisx0201 japanese-jisx0208-1978
 460                    chinese-gb2312 japanese-jisx0208
 461                    korean-ksc5601 japanese-jisx0212)
 462     (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
 463                     latin-jisx0201 japanese-jisx0208-1978
 464                     chinese-gb2312 japanese-jisx0208
 465                     korean-ksc5601 japanese-jisx0212
 466                     chinese-cns11643-1 chinese-cns11643-2)
 467     (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
 468                     cyrillic-iso8859-5 greek-iso8859-7
 469                     latin-jisx0201 japanese-jisx0208-1978
 470                     chinese-gb2312 japanese-jisx0208
 471                     korean-ksc5601 japanese-jisx0212
 472                     chinese-cns11643-1 chinese-cns11643-2
 473                     chinese-cns11643-3 chinese-cns11643-4
 474                     chinese-cns11643-5 chinese-cns11643-6
 475                     chinese-cns11643-7)
 476     (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208
 477                    japanese-jisx0213-1 japanese-jisx0213-2)
 478     (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208)
 479     ,(cond ((fboundp 'unicode-precedence-list)
 480             (cons 'utf-8 (delq 'ascii (mapcar 'charset-name
 481                                               (unicode-precedence-list)))))
 482            ((or (not (fboundp 'charsetp)) ;; non-Mule case
 483                 (charsetp 'unicode-a)
 484                 (not (mm-coding-system-p 'mule-utf-8)))
 485             '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e))
 486            (t ;; If we have utf-8 we're in Mule 5+.
 487             (append '(utf-8)
 488                     (delete 'ascii
 489                             (coding-system-get 'mule-utf-8 'safe-charsets))))))
 490   "Alist of MIME-charset/MULE-charsets.")
 491
 492 (defun mm-enrich-utf-8-by-mule-ucs ()
 493   "Make the `utf-8' MIME charset usable by the Mule-UCS package.
 494 This function will run when the `un-define' module is loaded under
 495 XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist'
 496 with Mule charsets.  It is completely useless for Emacs."
 497   (when (boundp 'unicode-basic-translation-charset-order-list)
 498     (condition-case nil
 499         (let ((val (delq
 500                     'ascii
 501                     (copy-sequence
 502                      (symbol-value
 503                       'unicode-basic-translation-charset-order-list))))
 504               (elem (assq 'utf-8 mm-mime-mule-charset-alist)))
 505           (if elem
 506               (setcdr elem val)
 507             (setq mm-mime-mule-charset-alist
 508                   (nconc mm-mime-mule-charset-alist
 509                          (list (cons 'utf-8 val))))))
 510       (error))))
 511
 512 ;; Correct by construction, but should be unnecessary for Emacs:
 513 (if (featurep 'xemacs)
 514     (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs))
 515   (when (and (fboundp 'coding-system-list)
 516              (fboundp 'sort-coding-systems))
 517     (let ((css (sort-coding-systems (coding-system-list 'base-only)))
 518           cs mime mule alist)
 519       (while css
 520         (setq cs (pop css)
 521               mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode)
 522                        (coding-system-get cs 'mime-charset)))
 523         (when (and mime
 524                    (not (eq t (setq mule
 525                                     (coding-system-get cs 'safe-charsets))))
 526                    (not (assq mime alist)))
 527           (push (cons mime (delq 'ascii mule)) alist)))
 528       (setq mm-mime-mule-charset-alist (nreverse alist)))))
 529
 530 (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2)
 531   "A list of special charsets.
 532 Valid elements include:
 533 `iso-8859-15'    convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists.
 534 `iso-2022-jp-2'  convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists."
 535 )
 536
 537 (defvar mm-iso-8859-15-compatible
 538   '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE")
 539     (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE"))
 540   "ISO-8859-15 exchangeable coding systems and inconvertible characters.")
 541
 542 (defvar mm-iso-8859-x-to-15-table
 543   (and (fboundp 'coding-system-p)
 544        (mm-coding-system-p 'iso-8859-15)
 545        (mapcar
 546         (lambda (cs)
 547           (if (mm-coding-system-p (car cs))
 548               (let ((c (string-to-char
 549                         (decode-coding-string "\341" (car cs)))))
 550                 (cons (char-charset c)
 551                       (cons
 552                        (- (string-to-char
 553                            (decode-coding-string "\341" 'iso-8859-15)) c)
 554                        (string-to-list (decode-coding-string (car (cdr cs))
 555                                                              (car cs))))))
 556             '(gnus-charset 0)))
 557         mm-iso-8859-15-compatible))
 558   "A table of the difference character between ISO-8859-X and ISO-8859-15.")
 559
 560 (defcustom mm-coding-system-priorities
 561   (if (boundp 'current-language-environment)
 562       (let ((lang (symbol-value 'current-language-environment)))
 563         (cond ((string= lang "Japanese")
 564                ;; Japanese users prefer iso-2022-jp to euc-japan or
 565                ;; shift_jis, however iso-8859-1 should be used when
 566                ;; there are only ASCII text and Latin-1 characters.
 567                '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))))
 568   "Preferred coding systems for encoding outgoing messages.
 569
 570 More than one suitable coding system may be found for some text.
 571 By default, the coding system with the highest priority is used
 572 to encode outgoing messages (see `sort-coding-systems').  If this
 573 variable is set, it overrides the default priority."
 574   :version "21.2"
 575   :type '(repeat (symbol :tag "Coding system"))
 576   :group 'mime)
 577
 578 ;; ??
 579 (defvar mm-use-find-coding-systems-region
 580   (fboundp 'find-coding-systems-region)
 581   "Use `find-coding-systems-region' to find proper coding systems.
 582
 583 Setting it to nil is useful on Emacsen supporting Unicode if sending
 584 mail with multiple parts is preferred to sending a Unicode one.")
 585
 586 ;;; Internal variables:
 587
 588 ;;; Functions:
 589
 590 (defun mm-mule-charset-to-mime-charset (charset)
 591   "Return the MIME charset corresponding to the given Mule CHARSET."
 592   (if (and (fboundp 'find-coding-systems-for-charsets)
 593            (fboundp 'sort-coding-systems))
 594       (let ((css (sort (sort-coding-systems
 595                         (find-coding-systems-for-charsets (list charset)))
 596                        'mm-sort-coding-systems-predicate))
 597             cs mime)
 598         (while (and (not mime)
 599                     css)
 600           (when (setq cs (pop css))
 601             (setq mime (or (coding-system-get cs :mime-charset)
 602                            (coding-system-get cs 'mime-charset)))))
 603         mime)
 604     (let ((alist (mapcar (lambda (cs)
 605                            (assq cs mm-mime-mule-charset-alist))
 606                          (sort (mapcar 'car mm-mime-mule-charset-alist)
 607                                'mm-sort-coding-systems-predicate)))
 608           out)
 609       (while alist
 610         (when (memq charset (cdar alist))
 611           (setq out (caar alist)
 612                 alist nil))
 613         (pop alist))
 614       out)))
 615
 616 (defun mm-charset-to-coding-system (charset &optional lbt
 617                                             allow-override)
 618   "Return coding-system corresponding to CHARSET.
 619 CHARSET is a symbol naming a MIME charset.
 620 If optional argument LBT (`unix', `dos' or `mac') is specified, it is
 621 used as the line break code type of the coding system.
 622
 623 If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to
 624 map undesired charset names to their replacement.  This should
 625 only be used for decoding, not for encoding."
 626   ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'.
 627   (when (stringp charset)
 628     (setq charset (intern (downcase charset))))
 629   (when lbt
 630     (setq charset (intern (format "%s-%s" charset lbt))))
 631   (cond
 632    ((null charset)
 633     charset)
 634    ;; Running in a non-MULE environment.
 635    ((or (null (mm-get-coding-system-list))
 636         (not (fboundp 'coding-system-get)))
 637     charset)
 638    ;; Check override list quite early.  Should only used for decoding, not for
 639    ;; encoding!
 640    ((and allow-override
 641          (let ((cs (cdr (assq charset mm-charset-override-alist))))
 642            (and cs (mm-coding-system-p cs) cs))))
 643    ;; ascii
 644    ((eq charset 'us-ascii)
 645     'ascii)
 646    ;; Check to see whether we can handle this charset.  (This depends
 647    ;; on there being some coding system matching each `mime-charset'
 648    ;; property defined, as there should be.)
 649    ((and (mm-coding-system-p charset)
 650 ;;; Doing this would potentially weed out incorrect charsets.
 651 ;;;      charset
 652 ;;;      (eq charset (coding-system-get charset 'mime-charset))
 653          )
 654     charset)
 655    ;; Eval expressions from `mm-charset-eval-alist'
 656    ((let* ((el (assq charset mm-charset-eval-alist))
 657            (cs (car el))
 658            (form (cdr el)))
 659       (and cs
 660            form
 661            (prog2
 662                ;; Avoid errors...
 663                (condition-case nil (eval form) (error nil))
 664                ;; (message "Failed to eval `%s'" form))
 665                (mm-coding-system-p cs)
 666              (message "Added charset `%s' via `mm-charset-eval-alist'" cs))
 667            cs)))
 668    ;; Translate invalid charsets.
 669    ((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
 670       (and cs
 671            (mm-coding-system-p cs)
 672            ;; (message
 673            ;;  "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'"
 674            ;;  cs charset)
 675            cs)))
 676    ;; Last resort: search the coding system list for entries which
 677    ;; have the right mime-charset in case the canonical name isn't
 678    ;; defined (though it should be).
 679    ((let (cs)
 680       ;; mm-get-coding-system-list returns a list of cs without lbt.
 681       ;; Do we need -lbt?
 682       (dolist (c (mm-get-coding-system-list))
 683         (if (and (null cs)
 684                  (eq charset (or (coding-system-get c :mime-charset)
 685                                  (coding-system-get c 'mime-charset))))
 686             (setq cs c)))
 687       (unless cs
 688         ;; Warn the user about unknown charset:
 689         (if (fboundp 'gnus-message)
 690             (gnus-message 7 "Unknown charset: %s" charset)
 691           (message "Unknown charset: %s" charset)))
 692       cs))))
 693
 694 (defsubst mm-replace-chars-in-string (string from to)
 695   (mm-subst-char-in-string from to string))
 696
 697 (eval-and-compile
 698   (defvar mm-emacs-mule (and (not (featurep 'xemacs))
 699                              (boundp 'default-enable-multibyte-characters)
 700                              default-enable-multibyte-characters
 701                              (fboundp 'set-buffer-multibyte))
 702     "True in Emacs with Mule.")
 703
 704   (if mm-emacs-mule
 705       (defun mm-enable-multibyte ()
 706         "Set the multibyte flag of the current buffer.
 707 Only do this if the default value of `enable-multibyte-characters' is
 708 non-nil.  This is a no-op in XEmacs."
 709         (set-buffer-multibyte 'to))
 710     (defalias 'mm-enable-multibyte 'ignore))
 711
 712   (if mm-emacs-mule
 713       (defun mm-disable-multibyte ()
 714         "Unset the multibyte flag of in the current buffer.
 715 This is a no-op in XEmacs."
 716         (set-buffer-multibyte nil))
 717     (defalias 'mm-disable-multibyte 'ignore)))
 718
 719 (defun mm-preferred-coding-system (charset)
 720   ;; A typo in some Emacs versions.
 721   (or (get-charset-property charset 'preferred-coding-system)
 722       (get-charset-property charset 'prefered-coding-system)))
 723
 724 ;; Mule charsets shouldn't be used.
 725 (defsubst mm-guess-charset ()
 726   "Guess Mule charset from the language environment."
 727   (or
 728    mail-parse-mule-charset ;; cached mule-charset
 729    (progn
 730      (setq mail-parse-mule-charset
 731            (and (boundp 'current-language-environment)
 732                 (car (last
 733                       (assq 'charset
 734                             (assoc current-language-environment
 735                                    language-info-alist))))))
 736      (if (or (not mail-parse-mule-charset)
 737              (eq mail-parse-mule-charset 'ascii))
 738          (setq mail-parse-mule-charset
 739                (or (car (last (assq mail-parse-charset
 740                                     mm-mime-mule-charset-alist)))
 741                    ;; default
 742                    'latin-iso8859-1)))
 743      mail-parse-mule-charset)))
 744
 745 (defun mm-charset-after (&optional pos)
 746   "Return charset of a character in current buffer at position POS.
 747 If POS is nil, it defauls to the current point.
 748 If POS is out of range, the value is nil.
 749 If the charset is `composition', return the actual one."
 750   (let ((char (char-after pos)) charset)
 751     (if (< (mm-char-int char) 128)
 752         (setq charset 'ascii)
 753       ;; charset-after is fake in some Emacsen.
 754       (setq charset (and (fboundp 'char-charset) (char-charset char)))
 755       (if (eq charset 'composition)     ; Mule 4
 756           (let ((p (or pos (point))))
 757             (cadr (find-charset-region p (1+ p))))
 758         (if (and charset (not (memq charset '(ascii eight-bit-control
 759                                                     eight-bit-graphic))))
 760             charset
 761           (mm-guess-charset))))))
 762
 763 (defun mm-mime-charset (charset)
 764   "Return the MIME charset corresponding to the given Mule CHARSET."
 765   (if (eq charset 'unknown)
 766       (error "The message contains non-printable characters, please use attachment"))
 767   (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
 768       ;; This exists in Emacs 20.
 769       (or
 770        (and (mm-preferred-coding-system charset)
 771             (or (coding-system-get
 772                  (mm-preferred-coding-system charset) :mime-charset)
 773                 (coding-system-get
 774                  (mm-preferred-coding-system charset) 'mime-charset)))
 775        (and (eq charset 'ascii)
 776             'us-ascii)
 777        (mm-preferred-coding-system charset)
 778        (mm-mule-charset-to-mime-charset charset))
 779     ;; This is for XEmacs.
 780     (mm-mule-charset-to-mime-charset charset)))
 781
 782 (if (fboundp 'delete-dups)
 783     (defalias 'mm-delete-duplicates 'delete-dups)
 784   (defun mm-delete-duplicates (list)
 785     "Destructively remove `equal' duplicates from LIST.
 786 Store the result in LIST and return it.  LIST must be a proper list.
 787 Of several `equal' occurrences of an element in LIST, the first
 788 one is kept.
 789
 790 This is a compatibility function for Emacsen without `delete-dups'."
 791     ;; Code from `subr.el' in Emacs 22:
 792     (let ((tail list))
 793       (while tail
 794         (setcdr tail (delete (car tail) (cdr tail)))
 795         (setq tail (cdr tail))))
 796     list))
 797
 798 ;; Fixme:  This is used in places when it should be testing the
 799 ;; default multibyteness.  See mm-default-multibyte-p.
 800 (eval-and-compile
 801   (if (and (not (featurep 'xemacs))
 802            (boundp 'enable-multibyte-characters))
 803       (defun mm-multibyte-p ()
 804         "Non-nil if multibyte is enabled in the current buffer."
 805         enable-multibyte-characters)
 806     (defun mm-multibyte-p () (featurep 'mule))))
 807
 808 (defun mm-default-multibyte-p ()
 809   "Return non-nil if the session is multibyte.
 810 This affects whether coding conversion should be attempted generally."
 811   (if (featurep 'mule)
 812       (if (boundp 'default-enable-multibyte-characters)
 813           default-enable-multibyte-characters
 814         t)))
 815
 816 (defun mm-iso-8859-x-to-15-region (&optional b e)
 817   (if (fboundp 'char-charset)
 818       (let (charset item c inconvertible)
 819         (save-restriction
 820           (if e (narrow-to-region b e))
 821           (goto-char (point-min))
 822           (skip-chars-forward "\0-\177")
 823           (while (not (eobp))
 824             (cond
 825              ((not (setq item (assq (char-charset (setq c (char-after)))
 826                                     mm-iso-8859-x-to-15-table)))
 827               (forward-char))
 828              ((memq c (cdr (cdr item)))
 829               (setq inconvertible t)
 830               (forward-char))
 831              (t
 832               (insert-before-markers (prog1 (+ c (car (cdr item)))
 833                                        (delete-char 1)))))
 834             (skip-chars-forward "\0-\177")))
 835         (not inconvertible))))
 836
 837 (defun mm-sort-coding-systems-predicate (a b)
 838   (let ((priorities
 839          (mapcar (lambda (cs)
 840                    ;; Note: invalid entries are dropped silently
 841                    (and (setq cs (mm-coding-system-p cs))
 842                         (coding-system-base cs)))
 843                  mm-coding-system-priorities)))
 844     (and (setq a (mm-coding-system-p a))
 845          (if (setq b (mm-coding-system-p b))
 846              (> (length (memq (coding-system-base a) priorities))
 847                 (length (memq (coding-system-base b) priorities)))
 848            t))))
 849
 850 (eval-when-compile
 851   (autoload 'latin-unity-massage-name "latin-unity")
 852   (autoload 'latin-unity-maybe-remap "latin-unity")
 853   (autoload 'latin-unity-representations-feasible-region "latin-unity")
 854   (autoload 'latin-unity-representations-present-region "latin-unity")
 855   (defvar latin-unity-coding-systems)
 856   (defvar latin-unity-ucs-list))
 857
 858 (defun mm-xemacs-find-mime-charset-1 (begin end)
 859   "Determine which MIME charset to use to send region as message.
 860 This uses the XEmacs-specific latin-unity package to better handle the
 861 case where identical characters from diverse ISO-8859-? character sets
 862 can be encoded using a single one of the corresponding coding systems.
 863
 864 It treats `mm-coding-system-priorities' as the list of preferred
 865 coding systems; a useful example setting for this list in Western
 866 Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default
 867 to the very standard Latin 1 coding system, and only move to coding
 868 systems that are less supported as is necessary to encode the
 869 characters that exist in the buffer.
 870
 871 Latin Unity doesn't know about those non-ASCII Roman characters that
 872 are available in various East Asian character sets.  As such, its
 873 behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a
 874 buffer and it can otherwise be encoded as Latin 1, won't be ideal.
 875 But this is very much a corner case, so don't worry about it."
 876   (let ((systems mm-coding-system-priorities) csets psets curset)
 877
 878     ;; Load the Latin Unity library, if available.
 879     (when (and (not (featurep 'latin-unity)) (locate-library "latin-unity"))
 880       (ignore-errors (require 'latin-unity)))
 881
 882     ;; Now, can we use it?
 883     (if (featurep 'latin-unity)
 884         (progn
 885           (setq csets (latin-unity-representations-feasible-region begin end)
 886                 psets (latin-unity-representations-present-region begin end))
 887
 888           (catch 'done
 889
 890             ;; Pass back the first coding system in the preferred list
 891             ;; that can encode the whole region.
 892             (dolist (curset systems)
 893               (setq curset (latin-unity-massage-name 'buffer-default curset))
 894
 895               ;; If the coding system is a universal coding system, then
 896               ;; it can certainly encode all the characters in the region.
 897               (if (memq curset latin-unity-ucs-list)
 898                   (throw 'done (list curset)))
 899
 900               ;; If a coding system isn't universal, and isn't in
 901               ;; the list that latin unity knows about, we can't
 902               ;; decide whether to use it here. Leave that until later
 903               ;; in `mm-find-mime-charset-region' function, whence we
 904               ;; have been called.
 905               (unless (memq curset latin-unity-coding-systems)
 906                 (throw 'done nil))
 907
 908               ;; Right, we know about this coding system, and it may
 909               ;; conceivably be able to encode all the characters in
 910               ;; the region.
 911               (if (latin-unity-maybe-remap begin end curset csets psets t)
 912                   (throw 'done (list curset))))
 913
 914             ;; Can't encode using anything from the
 915             ;; `mm-coding-system-priorities' list.
 916             ;; Leave `mm-find-mime-charset' to do most of the work.
 917             nil))
 918
 919       ;; Right, latin unity isn't available; let `mm-find-charset-region'
 920       ;; take its default action, which equally applies to GNU Emacs.
 921       nil)))
 922
 923 (defmacro mm-xemacs-find-mime-charset (begin end)
 924   (when (featurep 'xemacs)
 925     `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end))))
 926
 927 (defun mm-find-mime-charset-region (b e &optional hack-charsets)
 928   "Return the MIME charsets needed to encode the region between B and E.
 929 nil means ASCII, a single-element list represents an appropriate MIME
 930 charset, and a longer list means no appropriate charset."
 931   (let (charsets)
 932     ;; The return possibilities of this function are a mess...
 933     (or (and (mm-multibyte-p)
 934              mm-use-find-coding-systems-region
 935              ;; Find the mime-charset of the most preferred coding
 936              ;; system that has one.
 937              (let ((systems (find-coding-systems-region b e)))
 938                (when mm-coding-system-priorities
 939                  (setq systems
 940                        (sort systems 'mm-sort-coding-systems-predicate)))
 941                (setq systems (delq 'compound-text systems))
 942                (unless (equal systems '(undecided))
 943                  (while systems
 944                    (let* ((head (pop systems))
 945                           (cs (or (coding-system-get head :mime-charset)
 946                                   (coding-system-get head 'mime-charset))))
 947                      ;; The mime-charset (`x-ctext') of
 948                      ;; `compound-text' is not in the IANA list.  We
 949                      ;; shouldn't normally use anything here with a
 950                      ;; mime-charset having an `x-' prefix.
 951                      ;; Fixme:  Allow this to be overridden, since
 952                      ;; there is existing use of x-ctext.
 953                      ;; Also people apparently need the coding system
 954                      ;; `iso-2022-jp-3' (which Mule-UCS defines with
 955                      ;; mime-charset, though it's not valid).
 956                      (if (and cs
 957                               (not (string-match "^[Xx]-" (symbol-name cs)))
 958                               ;; UTF-16 of any variety is invalid for
 959                               ;; text parts and, unfortunately, has
 960                               ;; mime-charset defined both in Mule-UCS
 961                               ;; and versions of Emacs.  (The name
 962                               ;; might be `mule-utf-16...'  or
 963                               ;; `utf-16...'.)
 964                               (not (string-match "utf-16" (symbol-name cs))))
 965                          (setq systems nil
 966                                charsets (list cs))))))
 967                charsets))
 968         ;; If we're XEmacs, and some coding system is appropriate,
 969         ;; mm-xemacs-find-mime-charset will return an appropriate list.
 970         ;; Otherwise, we'll get nil, and the next setq will get invoked.
 971         (setq charsets (mm-xemacs-find-mime-charset b e))
 972
 973         ;; We're not multibyte, or a single coding system won't cover it.
 974         (setq charsets
 975               (mm-delete-duplicates
 976                (mapcar 'mm-mime-charset
 977                        (delq 'ascii
 978                              (mm-find-charset-region b e))))))
 979     (if (and (> (length charsets) 1)
 980              (memq 'iso-8859-15 charsets)
 981              (memq 'iso-8859-15 hack-charsets)
 982              (save-excursion (mm-iso-8859-x-to-15-region b e)))
 983         (mapcar (lambda (x) (setq charsets (delq (car x) charsets)))
 984                 mm-iso-8859-15-compatible))
 985     (if (and (memq 'iso-2022-jp-2 charsets)
 986              (memq 'iso-2022-jp-2 hack-charsets))
 987         (setq charsets (delq 'iso-2022-jp charsets)))
 988     ;; Attempt to reduce the number of charsets if utf-8 is available.
 989     (if (and (featurep 'xemacs)
 990              (> (length charsets) 1)
 991              (mm-coding-system-p 'utf-8))
 992         (let ((mm-coding-system-priorities
 993                (cons 'utf-8 mm-coding-system-priorities)))
 994           (setq charsets
 995                 (mm-delete-duplicates
 996                  (mapcar 'mm-mime-charset
 997                          (delq 'ascii
 998                                (mm-find-charset-region b e)))))))
 999     charsets))
1000
1001 (defmacro mm-with-unibyte-buffer (&rest forms)
1002   "Create a temporary buffer, and evaluate FORMS there like `progn'.
1003 Use unibyte mode for this."
1004   `(let (default-enable-multibyte-characters)
1005      (with-temp-buffer ,@forms)))
1006 (put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
1007 (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
1008
1009 (defmacro mm-with-multibyte-buffer (&rest forms)
1010   "Create a temporary buffer, and evaluate FORMS there like `progn'.
1011 Use multibyte mode for this."
1012   `(let ((default-enable-multibyte-characters t))
1013      (with-temp-buffer ,@forms)))
1014 (put 'mm-with-multibyte-buffer 'lisp-indent-function 0)
1015 (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body))
1016
1017 (defmacro mm-with-unibyte-current-buffer (&rest forms)
1018   "Evaluate FORMS with current buffer temporarily made unibyte.
1019 Also bind `default-enable-multibyte-characters' to nil.
1020 Equivalent to `progn' in XEmacs
1021
1022 NOTE: Use this macro with caution in multibyte buffers (it is not
1023 worth using this macro in unibyte buffers of course).  Use of
1024 `(set-buffer-multibyte t)', which is run finally, is generally
1025 harmful since it is likely to modify existing data in the buffer.
1026 For instance, it converts \"\\300\\255\" into \"\\255\" in
1027 Emacs 23 (unicode)."
1028   (let ((multibyte (make-symbol "multibyte"))
1029         (buffer (make-symbol "buffer")))
1030     `(if mm-emacs-mule
1031          (let ((,multibyte enable-multibyte-characters)
1032                (,buffer (current-buffer)))
1033            (unwind-protect
1034                (let (default-enable-multibyte-characters)
1035                  (set-buffer-multibyte nil)
1036                  ,@forms)
1037              (set-buffer ,buffer)
1038              (set-buffer-multibyte ,multibyte)))
1039        (let (default-enable-multibyte-characters)
1040          ,@forms))))
1041 (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
1042 (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
1043
1044 (defmacro mm-with-unibyte (&rest forms)
1045   "Eval the FORMS with the default value of `enable-multibyte-characters' nil."
1046   `(let (default-enable-multibyte-characters)
1047      ,@forms))
1048 (put 'mm-with-unibyte 'lisp-indent-function 0)
1049 (put 'mm-with-unibyte 'edebug-form-spec '(body))
1050
1051 (defmacro mm-with-multibyte (&rest forms)
1052   "Eval the FORMS with the default value of `enable-multibyte-characters' t."
1053   `(let ((default-enable-multibyte-characters t))
1054      ,@forms))
1055 (put 'mm-with-multibyte 'lisp-indent-function 0)
1056 (put 'mm-with-multibyte 'edebug-form-spec '(body))
1057
1058 (defun mm-find-charset-region (b e)
1059   "Return a list of Emacs charsets in the region B to E."
1060   (cond
1061    ((and (mm-multibyte-p)
1062          (fboundp 'find-charset-region))
1063     ;; Remove composition since the base charsets have been included.
1064     ;; Remove eight-bit-*, treat them as ascii.
1065     (let ((css (find-charset-region b e)))
1066       (mapcar (lambda (cs) (setq css (delq cs css)))
1067               '(composition eight-bit-control eight-bit-graphic
1068                             control-1))
1069       css))
1070    (t
1071     ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
1072     (save-excursion
1073       (save-restriction
1074         (narrow-to-region b e)
1075         (goto-char (point-min))
1076         (skip-chars-forward "\0-\177")
1077         (if (eobp)
1078             '(ascii)
1079           (let (charset)
1080             (setq charset
1081                   (and (boundp 'current-language-environment)
1082                        (car (last (assq 'charset
1083                                         (assoc current-language-environment
1084                                                language-info-alist))))))
1085             (if (eq charset 'ascii) (setq charset nil))
1086             (or charset
1087                 (setq charset
1088                       (car (last (assq mail-parse-charset
1089                                        mm-mime-mule-charset-alist)))))
1090             (list 'ascii (or charset 'latin-iso8859-1)))))))))
1091
1092 (if (fboundp 'shell-quote-argument)
1093     (defalias 'mm-quote-arg 'shell-quote-argument)
1094   (defun mm-quote-arg (arg)
1095     "Return a version of ARG that is safe to evaluate in a shell."
1096     (let ((pos 0) new-pos accum)
1097       ;; *** bug: we don't handle newline characters properly
1098       (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos))
1099         (push (substring arg pos new-pos) accum)
1100         (push "\\" accum)
1101         (push (list (aref arg new-pos)) accum)
1102         (setq pos (1+ new-pos)))
1103       (if (= pos 0)
1104           arg
1105         (apply 'concat (nconc (nreverse accum) (list (substring arg pos))))))))
1106
1107 (defun mm-auto-mode-alist ()
1108   "Return an `auto-mode-alist' with only the .gz (etc) thingies."
1109   (let ((alist auto-mode-alist)
1110         out)
1111     (while alist
1112       (when (listp (cdar alist))
1113         (push (car alist) out))
1114       (pop alist))
1115     (nreverse out)))
1116
1117 (defvar mm-inhibit-file-name-handlers
1118   '(jka-compr-handler image-file-handler)
1119   "A list of handlers doing (un)compression (etc) thingies.")
1120
1121 (defun mm-insert-file-contents (filename &optional visit beg end replace
1122                                          inhibit)
1123   "Like `insert-file-contents', but only reads in the file.
1124 A buffer may be modified in several ways after reading into the buffer due
1125 to advanced Emacs features, such as file-name-handlers, format decoding,
1126 `find-file-hooks', etc.
1127 If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'.
1128   This function ensures that none of these modifications will take place."
1129   (let* ((format-alist nil)
1130          (auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
1131          (default-major-mode 'fundamental-mode)
1132          (enable-local-variables nil)
1133          (after-insert-file-functions nil)
1134          (enable-local-eval nil)
1135          (inhibit-file-name-operation (if inhibit
1136                                           'insert-file-contents
1137                                         inhibit-file-name-operation))
1138          (inhibit-file-name-handlers
1139           (if inhibit
1140               (append mm-inhibit-file-name-handlers
1141                       inhibit-file-name-handlers)
1142             inhibit-file-name-handlers))
1143          (ffh (if (boundp 'find-file-hook)
1144                   'find-file-hook
1145                 'find-file-hooks))
1146          (val (symbol-value ffh)))
1147     (set ffh nil)
1148     (unwind-protect
1149         (insert-file-contents filename visit beg end replace)
1150       (set ffh val))))
1151
1152 (defun mm-append-to-file (start end filename &optional codesys inhibit)
1153   "Append the contents of the region to the end of file FILENAME.
1154 When called from a function, expects three arguments,
1155 START, END and FILENAME.  START and END are buffer positions
1156 saying what text to write.
1157 Optional fourth argument specifies the coding system to use when
1158 encoding the file.
1159 If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
1160   (let ((coding-system-for-write
1161          (or codesys mm-text-coding-system-for-write
1162              mm-text-coding-system))
1163         (inhibit-file-name-operation (if inhibit
1164                                          'append-to-file
1165                                        inhibit-file-name-operation))
1166         (inhibit-file-name-handlers
1167          (if inhibit
1168              (append mm-inhibit-file-name-handlers
1169                      inhibit-file-name-handlers)
1170            inhibit-file-name-handlers)))
1171     (write-region start end filename t 'no-message)
1172     (message "Appended to %s" filename)))
1173
1174 (defun mm-write-region (start end filename &optional append visit lockname
1175                               coding-system inhibit)
1176
1177   "Like `write-region'.
1178 If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
1179   (let ((coding-system-for-write
1180          (or coding-system mm-text-coding-system-for-write
1181              mm-text-coding-system))
1182         (inhibit-file-name-operation (if inhibit
1183                                          'write-region
1184                                        inhibit-file-name-operation))
1185         (inhibit-file-name-handlers
1186          (if inhibit
1187              (append mm-inhibit-file-name-handlers
1188                      inhibit-file-name-handlers)
1189            inhibit-file-name-handlers)))
1190     (write-region start end filename append visit lockname)))
1191
1192 ;; It is not a MIME function, but some MIME functions use it.
1193 (if (and (fboundp 'make-temp-file)
1194          (ignore-errors
1195            (let ((def (symbol-function 'make-temp-file)))
1196              (and (byte-code-function-p def)
1197                   (setq def (if (fboundp 'compiled-function-arglist)
1198                                 ;; XEmacs
1199                                 (eval (list 'compiled-function-arglist def))
1200                               (aref def 0)))
1201                   (>= (length def) 4)
1202                   (eq (nth 3 def) 'suffix)))))
1203     (defalias 'mm-make-temp-file 'make-temp-file)
1204   ;; Stolen (and modified for Emacs 20 and XEmacs) from Emacs 22.
1205   (defun mm-make-temp-file (prefix &optional dir-flag suffix)
1206     "Create a temporary file.
1207 The returned file name (created by appending some random characters at the end
1208 of PREFIX, and expanding against `temporary-file-directory' if necessary),
1209 is guaranteed to point to a newly created empty file.
1210 You can then use `write-region' to write new data into the file.
1211
1212 If DIR-FLAG is non-nil, create a new empty directory instead of a file.
1213
1214 If SUFFIX is non-nil, add that at the end of the file name."
1215     (let ((umask (default-file-modes))
1216           file)
1217       (unwind-protect
1218           (progn
1219             ;; Create temp files with strict access rights.  It's easy to
1220             ;; loosen them later, whereas it's impossible to close the
1221             ;; time-window of loose permissions otherwise.
1222             (set-default-file-modes 448)
1223             (while (condition-case err
1224                        (progn
1225                          (setq file
1226                                (make-temp-name
1227                                 (expand-file-name
1228                                  prefix
1229                                  (if (fboundp 'temp-directory)
1230                                      ;; XEmacs
1231                                      (temp-directory)
1232                                    temporary-file-directory))))
1233                          (if suffix
1234                              (setq file (concat file suffix)))
1235                          (if dir-flag
1236                              (make-directory file)
1237                            ;; NOTE: This is unsafe if Emacs 20
1238                            ;; users and XEmacs users don't use
1239                            ;; a secure temp directory.
1240                            (gmm-write-region "" nil file nil 'silent
1241                                              nil 'excl))
1242                          nil)
1243                      (file-already-exists t)
1244                      ;; The Emacs 20 and XEmacs versions of
1245                      ;; `make-directory' issue `file-error'.
1246                      (file-error (or (and (or (featurep 'xemacs)
1247                                               (= emacs-major-version 20))
1248                                           (file-exists-p file))
1249                                      (signal (car err) (cdr err)))))
1250               ;; the file was somehow created by someone else between
1251               ;; `make-temp-name' and `write-region', let's try again.
1252               nil)
1253             file)
1254         ;; Reset the umask.
1255         (set-default-file-modes umask)))))
1256
1257 (defun mm-image-load-path (&optional package)
1258   (let (dir result)
1259     (dolist (path load-path (nreverse result))
1260       (when (and path
1261                  (file-directory-p
1262                   (setq dir (concat (file-name-directory
1263                                      (directory-file-name path))
1264                                     "etc/images/" (or package "gnus/")))))
1265         (push dir result))
1266       (push path result))))
1267
1268 ;; Fixme: This doesn't look useful where it's used.
1269 (if (fboundp 'detect-coding-region)
1270     (defun mm-detect-coding-region (start end)
1271       "Like `detect-coding-region' except returning the best one."
1272       (let ((coding-systems
1273              (detect-coding-region start end)))
1274         (or (car-safe coding-systems)
1275             coding-systems)))
1276   (defun mm-detect-coding-region (start end)
1277     (let ((point (point)))
1278       (goto-char start)
1279       (skip-chars-forward "\0-\177" end)
1280       (prog1
1281           (if (eq (point) end) 'ascii (mm-guess-charset))
1282         (goto-char point)))))
1283
1284 (if (fboundp 'coding-system-get)
1285     (defun mm-detect-mime-charset-region (start end)
1286       "Detect MIME charset of the text in the region between START and END."
1287       (let ((cs (mm-detect-coding-region start end)))
1288         (or (coding-system-get cs :mime-charset)
1289             (coding-system-get cs 'mime-charset))))
1290   (defun mm-detect-mime-charset-region (start end)
1291     "Detect MIME charset of the text in the region between START and END."
1292     (let ((cs (mm-detect-coding-region start end)))
1293       cs)))
1294
1295
1296 (provide 'mm-util)
1297
1298 ;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238
1299 ;;; mm-util.el ends here