1 ;;; latexenc.el --- guess correct coding system in LaTeX files -*-coding: iso-2022-7bit -*-
3 ;; Copyright (C) 2005, 2006 Free Software Foundation, Inc.
5 ;; Author: Arne J\e,Ax\e(Brgensen <arne@arnested.dk>
6 ;; Keywords: mule, coding system, latex
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
27 ;; This code tries to guess the correct coding system of a LaTeX file.
29 ;; First it searches for a \inputencoding{...} or
30 ;; \usepackage[...]{inputenc} line in the file and looks up the ... in
31 ;; `latex-inputenc-coding-alist' to find the corresponding coding
34 ;; If this fails it will search for AUCTeX's TeX-master or tex-mode's
35 ;; tex-main-file variable in the local variables section and visit
36 ;; that file to get the coding system from the master file. This check
37 ;; can be disabled by setting `latexenc-dont-use-TeX-master-flag' to
40 ;; If we have still not found a coding system we will try to use the
41 ;; standard tex-mode's `tex-guess-main-file' and get the coding system
42 ;; from the main file. This check can be disabled by setting
43 ;; `latexenc-dont-use-tex-guess-main-file-flag' to t.
45 ;; The functionality is enabled by adding the function
46 ;; `latexenc-find-file-coding-system' to `file-coding-system-alist'
49 ;; (add-to-list 'file-coding-system-alist
50 ;; '("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system))
55 (defcustom latex-inputenc-coding-alist
56 '(("ansinew" . windows-1252
) ; MS Windows ANSI encoding, extension of Latin-1
57 ("applemac" . mac-roman
)
59 ("cp1250" . windows-1250
) ; MS Windows encoding, codepage 1250
60 ("cp1252" . windows-1252
) ; synonym of ansinew
62 ("cp437de" . cp437
) ; IBM code page 437 (German version): 225 is \ss
63 ("cp437" . cp437
) ; IBM code page 437: 225 is \beta
64 ("cp850" . cp850
) ; IBM code page 850
65 ("cp852" . cp852
) ; IBM code page 852
66 ("cp858" . cp858
) ; IBM code page 850 but with a euro symbol
67 ("cp865" . cp865
) ; IBM code page 865
68 ;; The DECMultinational charaterset used by the OpenVMS system
69 ;; ("decmulti" . undecided)
70 ("latin1" . iso-8859-1
)
71 ("latin2" . iso-8859-2
)
72 ("latin3" . iso-8859-3
)
73 ("latin4" . iso-8859-4
)
74 ("latin5" . iso-8859-5
)
75 ("latin9" . iso-8859-15
)
76 ;; ("latin10" . undecided)
77 ;; ("macce" . undecided) ; Apple Central European
78 ("next" . next
) ; The Next encoding
80 ("utf8x" . utf-8
)) ; used by the Unicode LaTeX package
81 "Mapping from LaTeX encodings in \"inputenc.sty\" to Emacs coding systems.
82 LaTeX encodings are specified with \"\\usepackage[encoding]{inputenc}\".
83 Used by the function `latexenc-find-file-coding-system'."
86 :type
'(alist :key-type
(string :tag
"LaTeX input encoding")
87 :value-type
(coding-system :tag
"Coding system")))
90 (defun latexenc-inputenc-to-coding-system (inputenc)
91 "Return the corresponding coding-system for the specified input encoding.
92 Return nil if no matching coding system can be found."
93 (cdr (assoc inputenc latex-inputenc-coding-alist
)))
96 (defun latexenc-coding-system-to-inputenc (cs)
97 "Return the corresponding input encoding for the specified coding system.
98 Return nil if no matching input encoding can be found."
101 (dolist (elem latex-inputenc-coding-alist result
)
102 (let ((elem-cs (cdr elem
)))
103 (when (and (coding-system-p elem-cs
)
105 (eq (coding-system-base cs
) (coding-system-base elem-cs
)))
106 (setq result
(car elem
))
107 (throw 'result result
)))))))
109 (defvar latexenc-dont-use-TeX-master-flag nil
110 "Non-nil means don't follow TeX-master to find the coding system.")
112 (defvar latexenc-dont-use-tex-guess-main-file-flag nil
113 "Non-nil means don't use tex-guessmain-file to find the coding system.")
116 (defun latexenc-find-file-coding-system (arg-list)
117 "Determine the coding system of a LaTeX file if it uses \"inputenc.sty\".
118 The mapping from LaTeX's \"inputenc.sty\" encoding names to Emacs
119 coding system names is determined from `latex-inputenc-coding-alist'."
120 (if (eq (car arg-list
) 'insert-file-contents
)
122 ;; try to find the coding system in this file
123 (goto-char (point-min))
125 (let ((case-fold-search nil
))
126 (while (search-forward "inputenc" nil t
)
127 (goto-char (match-beginning 0))
129 (if (or (looking-at "[^%\n]*\\\\usepackage\\[\\([^]]*\\)\\]{\\([^}]*,\\)?inputenc\\(,[^}]*\\)?}")
130 (looking-at "[^%\n]*\\\\inputencoding{\\([^}]*\\)}"))
132 (goto-char (match-end 0))))))
133 (let* ((match (match-string 1))
134 (sym (or (latexenc-inputenc-to-coding-system match
)
137 ((coding-system-p sym
) sym
)
138 ((and (require 'code-pages nil t
) (coding-system-p sym
)) sym
)
140 ;; else try to find it in the master/main file
142 ;; Fixme: If the current file is in an archive (e.g. tar,
143 ;; zip), we should find the master file in that archive.
144 ;; But, that is not yet implemented. -- K.Handa
145 (let ((default-directory (if (stringp (nth 1 arg-list
))
146 (file-name-directory (nth 1 arg-list
))
149 ;; Is there a TeX-master or tex-main-file in the local variables
151 (unless latexenc-dont-use-TeX-master-flag
152 (goto-char (point-max))
153 (search-backward "\n\^L" (max (- (point-max) 3000) (point-min))
155 (search-forward "Local Variables:" nil t
)
156 (when (re-search-forward
157 "^%+ *\\(TeX-master\\|tex-main-file\\): *\"\\(.+\\)\""
159 (let ((file (match-string 2)))
160 (dolist (ext `("" ,(if (boundp 'TeX-default-extension
)
161 (concat "." TeX-default-extension
)
163 ".tex" ".ltx" ".dtx" ".drv"))
164 (if (and (null latexenc-main-file
) ;Stop at first.
165 (file-exists-p (concat file ext
)))
166 (setq latexenc-main-file
(concat file ext
)))))))
167 ;; try tex-modes tex-guess-main-file
168 (when (and (not latexenc-dont-use-tex-guess-main-file-flag
)
169 (not latexenc-main-file
))
170 ;; Use a separate `when' so the byte-compiler sees the fboundp.
171 (when (fboundp 'tex-guess-main-file
)
172 (let ((tex-start-of-header "\\\\document\\(style\\|class\\)"))
173 (setq latexenc-main-file
(tex-guess-main-file)))))
174 ;; if we found a master/main file get the coding system from it
175 (if (and latexenc-main-file
176 (file-regular-p latexenc-main-file
)
177 (file-readable-p latexenc-main-file
))
178 (let* ((latexenc-dont-use-tex-guess-main-file-flag t
)
179 (latexenc-dont-use-TeX-master-flag t
)
180 (latexenc-main-buffer
181 (find-file-noselect latexenc-main-file t
)))
182 (coding-system-base ;Disregard the EOL part of the CS.
183 (with-current-buffer latexenc-main-buffer
184 (or coding-system-for-write buffer-file-coding-system
))))
191 ;; arch-tag: f971bc3e-1fec-4609-8f2f-73dd41ab22e1
192 ;;; latexenc.el ends here