1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
3 ;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc.
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
7 ;; Bruno Haible <bruno@clisp.org>.
8 ;; Keywords: i18n, files
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
29 ;; This package makes sure visiting PO files decodes them correctly,
30 ;; according to the Charset= header in the PO file. For more support
31 ;; for editing PO files, see po-mode.el.
35 (defconst po-content-type-charset-alist
36 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
38 ("ANSI_X3.4-1968" . undecided
)
39 ("US-ASCII" . undecided
)
40 ("ISO-8859-1" . iso-8859-1
)
41 ("ISO_8859-1" . iso-8859-1
)
42 ("ISO-8859-2" . iso-8859-2
)
43 ("ISO_8859-2" . iso-8859-2
)
44 ("ISO-8859-3" . iso-8859-3
)
45 ("ISO_8859-3" . iso-8859-3
)
46 ("ISO-8859-4" . iso-8859-4
)
47 ("ISO_8859-4" . iso-8859-4
)
48 ("ISO-8859-5" . iso-8859-5
)
49 ("ISO_8859-5" . iso-8859-5
)
52 ("ISO-8859-7" . iso-8859-7
)
53 ("ISO_8859-7" . iso-8859-7
)
54 ("ISO-8859-8" . iso-8859-8
)
55 ("ISO_8859-8" . iso-8859-8
)
56 ("ISO-8859-9" . iso-8859-9
)
57 ("ISO_8859-9" . iso-8859-9
)
60 ("ISO-8859-15" . iso-8859-15
) ; requires Emacs 21
61 ("ISO_8859-15" . iso-8859-15
) ; requires Emacs 21
64 ("CP437" . cp437
) ; requires Emacs 20
65 ("CP775" . cp775
) ; requires Emacs 20
66 ("CP850" . cp850
) ; requires Emacs 20
67 ("CP852" . cp852
) ; requires Emacs 20
68 ("CP855" . cp855
) ; requires Emacs 20
70 ("CP857" . cp857
) ; requires Emacs 20
71 ("CP861" . cp861
) ; requires Emacs 20
72 ("CP862" . cp862
) ; requires Emacs 20
73 ("CP864" . cp864
) ; requires Emacs 20
74 ("CP865" . cp865
) ; requires Emacs 20
75 ("CP866" . cp866
) ; requires Emacs 21
76 ("CP869" . cp869
) ; requires Emacs 20
86 ("CP1250" . cp1250
) ; requires Emacs 20
87 ("CP1251" . cp1251
) ; requires Emacs 20
88 ("CP1252" . iso-8859-1
) ; approximation
89 ("CP1253" . cp1253
) ; requires Emacs 20
90 ("CP1254" . iso-8859-9
) ; approximation
91 ("CP1255" . iso-8859-8
) ; approximation
93 ("CP1257" . cp1257
) ; requires Emacs 20
94 ("GB2312" . cn-gb-2312
) ; also named 'gb2312' in XEmacs 21 or Emacs 21
95 ; also named 'euc-cn' in Emacs 20 or Emacs 21
103 ("SHIFT_JIS" . shift_jis
)
105 ("TIS-620" . tis-620
) ; requires Emacs 20 or Emacs 21
106 ("VISCII" . viscii
) ; requires Emacs 20 or Emacs 21
107 ("UTF-8" . utf-8
) ; requires Mule-UCS in Emacs 20, or Emacs 21
109 "How to convert a GNU libc/libiconv canonical charset name as seen in
110 Content-Type into a Mule coding system.")
112 (defun po-find-charset (filename)
113 "Return PO file charset value."
115 (let ((charset-regexp
116 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
118 ;; Try the first 4096 bytes. In case we cannot find the charset value
119 ;; within the first 4096 bytes (the PO file might start with a long
120 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
121 ;; we've checked the empty header entry entirely.
122 (while (not (or short-read
(re-search-forward "^msgid" nil t
)))
124 (goto-char (point-max))
125 (let ((pair (insert-file-contents-literally filename nil
127 (1- (+ (point) 4096)))))
128 (setq short-read
(< (nth 1 pair
) 4096)))))
129 (cond ((re-search-forward charset-regexp nil t
) (match-string 1))
131 ;; We've found the first msgid; maybe, only a part of the msgstr
132 ;; value was loaded. Load the next 1024 bytes; if charset still
133 ;; isn't available, give up.
135 (goto-char (point-max))
136 (insert-file-contents-literally filename nil
138 (1- (+ (point) 1024))))
139 (if (re-search-forward charset-regexp nil t
)
140 (match-string 1))))))
142 (defun po-find-file-coding-system-guts (operation filename
)
144 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
145 Called through file-coding-system-alist, before the file is visited for real."
146 (and (eq operation
'insert-file-contents
)
147 (file-exists-p filename
)
149 (let* ((coding-system-for-read 'no-conversion
)
150 (charset (or (po-find-charset filename
) "ascii"))
151 (charset-upper (upcase charset
))
152 (charset-lower (downcase charset
))
154 (cdr (assoc charset-upper po-content-type-charset-alist
)))
155 (try (or candidate
(intern-soft charset-lower
))))
156 (list (cond ((and try
(coding-system-p try
))
159 (string-match "\\`cp[1-9][0-9][0-9]?\\'"
161 (assoc (substring (symbol-name try
) 2)
162 (cp-supported-codepages)))
163 (codepage-setup (substring (symbol-name try
) 2))
165 ((and (string-match "\\`cp[1-9][0-9][0-9]?\\'"
167 (assoc (substring charset-lower
2)
168 (cp-supported-codepages)))
169 (codepage-setup (substring charset-lower
2))
170 (intern charset-lower
))
172 'no-conversion
)))))))
175 (defun po-find-file-coding-system (arg-list)
177 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
178 Called through file-coding-system-alist, before the file is visited for real."
179 (po-find-file-coding-system-guts (car arg-list
) (car (cdr arg-list
))))
180 ;; This is for XEmacs.
181 ;(defun po-find-file-coding-system (operation filename)
183 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
184 ;Called through file-coding-system-alist, before the file is visited for real."
185 ; (po-find-file-coding-system-guts operation filename))