*** empty log message ***
[emacs.git] / lisp / textmodes / po.el
blob6de22b54734405714a0970f376de2a2e4b3b973c
1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
3 ;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc.
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
7 ;; Bruno Haible <bruno@clisp.org>.
8 ;; Keywords: i18n, files
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
27 ;;; Commentary:
29 ;; This package makes sure visiting PO files decodes them correctly,
30 ;; according to the Charset= header in the PO file. For more support
31 ;; for editing PO files, see po-mode.el.
33 ;;; Code:
35 (defconst po-content-type-charset-alist
36 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
37 ("ASCII" . undecided)
38 ("ANSI_X3.4-1968" . undecided)
39 ("US-ASCII" . undecided)
40 ("ISO-8859-1" . iso-8859-1)
41 ("ISO_8859-1" . iso-8859-1)
42 ("ISO-8859-2" . iso-8859-2)
43 ("ISO_8859-2" . iso-8859-2)
44 ("ISO-8859-3" . iso-8859-3)
45 ("ISO_8859-3" . iso-8859-3)
46 ("ISO-8859-4" . iso-8859-4)
47 ("ISO_8859-4" . iso-8859-4)
48 ("ISO-8859-5" . iso-8859-5)
49 ("ISO_8859-5" . iso-8859-5)
50 ;("ISO-8859-6" . ??)
51 ;("ISO_8859-6" . ??)
52 ("ISO-8859-7" . iso-8859-7)
53 ("ISO_8859-7" . iso-8859-7)
54 ("ISO-8859-8" . iso-8859-8)
55 ("ISO_8859-8" . iso-8859-8)
56 ("ISO-8859-9" . iso-8859-9)
57 ("ISO_8859-9" . iso-8859-9)
58 ;("ISO-8859-13" . ??)
59 ;("ISO_8859-13" . ??)
60 ("ISO-8859-15" . iso-8859-15) ; requires Emacs 21
61 ("ISO_8859-15" . iso-8859-15) ; requires Emacs 21
62 ("KOI8-R" . koi8-r)
63 ;("KOI8-U" . ??)
64 ("CP437" . cp437) ; requires Emacs 20
65 ("CP775" . cp775) ; requires Emacs 20
66 ("CP850" . cp850) ; requires Emacs 20
67 ("CP852" . cp852) ; requires Emacs 20
68 ("CP855" . cp855) ; requires Emacs 20
69 ;("CP856" . ??)
70 ("CP857" . cp857) ; requires Emacs 20
71 ("CP861" . cp861) ; requires Emacs 20
72 ("CP862" . cp862) ; requires Emacs 20
73 ("CP864" . cp864) ; requires Emacs 20
74 ("CP865" . cp865) ; requires Emacs 20
75 ("CP866" . cp866) ; requires Emacs 21
76 ("CP869" . cp869) ; requires Emacs 20
77 ;("CP874" . ??)
78 ;("CP922" . ??)
79 ;("CP932" . ??)
80 ;("CP943" . ??)
81 ;("CP949" . ??)
82 ;("CP950" . ??)
83 ;("CP1046" . ??)
84 ;("CP1124" . ??)
85 ;("CP1129" . ??)
86 ("CP1250" . cp1250) ; requires Emacs 20
87 ("CP1251" . cp1251) ; requires Emacs 20
88 ("CP1252" . iso-8859-1) ; approximation
89 ("CP1253" . cp1253) ; requires Emacs 20
90 ("CP1254" . iso-8859-9) ; approximation
91 ("CP1255" . iso-8859-8) ; approximation
92 ;("CP1256" . ??)
93 ("CP1257" . cp1257) ; requires Emacs 20
94 ("GB2312" . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21
95 ; also named 'euc-cn' in Emacs 20 or Emacs 21
96 ("EUC-JP" . euc-jp)
97 ("EUC-KR" . euc-kr)
98 ;("EUC-TW" . ??)
99 ("BIG5" . big5)
100 ;("BIG5-HKSCS" . ??)
101 ;("GBK" . ??)
102 ;("GB18030" . ??)
103 ("SHIFT_JIS" . shift_jis)
104 ;("JOHAB" . ??)
105 ("TIS-620" . tis-620) ; requires Emacs 20 or Emacs 21
106 ("VISCII" . viscii) ; requires Emacs 20 or Emacs 21
107 ("UTF-8" . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21
109 "How to convert a GNU libc/libiconv canonical charset name as seen in
110 Content-Type into a Mule coding system.")
112 (defun po-find-charset (filename)
113 "Return PO file charset value."
114 (interactive)
115 (let ((charset-regexp
116 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
117 (short-read nil))
118 ;; Try the first 4096 bytes. In case we cannot find the charset value
119 ;; within the first 4096 bytes (the PO file might start with a long
120 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
121 ;; we've checked the empty header entry entirely.
122 (while (not (or short-read (re-search-forward "^msgid" nil t)))
123 (save-excursion
124 (goto-char (point-max))
125 (let ((pair (insert-file-contents-literally filename nil
126 (1- (point))
127 (1- (+ (point) 4096)))))
128 (setq short-read (< (nth 1 pair) 4096)))))
129 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
130 (short-read nil)
131 ;; We've found the first msgid; maybe, only a part of the msgstr
132 ;; value was loaded. Load the next 1024 bytes; if charset still
133 ;; isn't available, give up.
134 (t (save-excursion
135 (goto-char (point-max))
136 (insert-file-contents-literally filename nil
137 (1- (point))
138 (1- (+ (point) 1024))))
139 (if (re-search-forward charset-regexp nil t)
140 (match-string 1))))))
142 (defun po-find-file-coding-system-guts (operation filename)
144 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
145 Called through file-coding-system-alist, before the file is visited for real."
146 (and (eq operation 'insert-file-contents)
147 (file-exists-p filename)
148 (with-temp-buffer
149 (let* ((coding-system-for-read 'no-conversion)
150 (charset (or (po-find-charset filename) "ascii"))
151 (charset-upper (upcase charset))
152 (charset-lower (downcase charset))
153 (candidate
154 (cdr (assoc charset-upper po-content-type-charset-alist)))
155 (try (or candidate (intern-soft charset-lower))))
156 (list (cond ((and try (coding-system-p try))
157 try)
158 ((and try
159 (string-match "\\`cp[1-9][0-9][0-9]?\\'"
160 (symbol-name try))
161 (assoc (substring (symbol-name try) 2)
162 (cp-supported-codepages)))
163 (codepage-setup (substring (symbol-name try) 2))
164 try)
165 ((and (string-match "\\`cp[1-9][0-9][0-9]?\\'"
166 charset-lower)
167 (assoc (substring charset-lower 2)
168 (cp-supported-codepages)))
169 (codepage-setup (substring charset-lower 2))
170 (intern charset-lower))
172 'no-conversion)))))))
174 ;;;###autoload
175 (defun po-find-file-coding-system (arg-list)
177 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
178 Called through file-coding-system-alist, before the file is visited for real."
179 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
180 ;; This is for XEmacs.
181 ;(defun po-find-file-coding-system (operation filename)
182 ; "\
183 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
184 ;Called through file-coding-system-alist, before the file is visited for real."
185 ; (po-find-file-coding-system-guts operation filename))