1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
6 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
7 ;; Greg McGary <gkm@magilla.cichlid.com>,
8 ;; Bruno Haible <bruno@clisp.org>.
9 ;; Keywords: i18n, files
11 ;; This file is part of GNU Emacs.
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 ;; This package makes sure visiting PO files decodes them correctly,
29 ;; according to the Charset= header in the PO file. For more support
30 ;; for editing PO files, see po-mode.el.
34 (defconst po-content-type-charset-alist
35 '(("ASCII" . undecided
)
36 ("ANSI_X3.4-1968" . undecided
)
37 ("US-ASCII" . undecided
))
38 "Alist of coding system versus GNU libc/libiconv canonical charset name.
39 Contains canonical charset names that don't correspond to coding systems.")
41 (defun po-find-charset (filename)
42 "Return PO charset value for FILENAME.
43 If FILENAME is a cons cell, its CDR is a buffer that already contains
44 the PO file (but not yet decoded)."
46 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
47 (buf (and (consp filename
) (cdr filename
)))
51 (goto-char (point-min)))
52 ;; Try the first 4096 bytes. In case we cannot find the charset value
53 ;; within the first 4096 bytes (the PO file might start with a long
54 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
55 ;; we've checked the empty header entry entirely.
56 (while (not (or short-read
(re-search-forward "^msgid" nil t
) buf
))
58 (goto-char (point-max))
59 (let ((pair (insert-file-contents-literally filename nil
61 (1- (+ (point) 4096)))))
62 (setq short-read
(< (nth 1 pair
) 4096)))))
63 (cond ((re-search-forward charset-regexp nil t
) (match-string 1))
64 ((or short-read buf
) nil
)
65 ;; We've found the first msgid; maybe, only a part of the msgstr
66 ;; value was loaded. Load the next 1024 bytes; if charset still
67 ;; isn't available, give up.
69 (goto-char (point-max))
70 (insert-file-contents-literally filename nil
72 (1- (+ (point) 1024))))
73 (if (re-search-forward charset-regexp nil t
)
76 (defun po-find-file-coding-system-guts (operation filename
)
77 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
78 Do so according to FILENAME's declared charset.
79 FILENAME may be a cons (NAME . BUFFER). In that case, detect charset
82 (eq operation
'insert-file-contents
)
83 (or (if (consp filename
) (buffer-live-p (cdr filename
)))
84 (file-exists-p filename
))
86 (let* ((coding-system-for-read 'no-conversion
)
87 (charset (or (po-find-charset filename
) "ascii"))
92 po-content-type-charset-alist
95 ((or (setq assoc
(assoc-string charset coding-system-alist t
))
97 (assoc-string (subst-char-in-string ?_ ?-
99 coding-system-alist t
)))
100 (intern (car assoc
)))
101 ;; In principle we should also check the `mime-charset'
102 ;; property of everything in the base coding system
103 ;; list, but there should always be a coding system
104 ;; corresponding to the MIME name.
105 ((featurep 'code-pages
)
109 ;; Try again with code-pages loaded. Maybe it's best
110 ;; to require it initially?
111 (require 'code-pages nil t
)
113 (setq assoc
(assoc-string charset coding-system-alist t
))
114 (setq assoc
(assoc-string (subst-char-in-string
116 coding-system-alist t
)))
121 (defun po-find-file-coding-system (arg-list)
122 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
123 Called through `file-coding-system-alist', before the file is visited for real."
124 (po-find-file-coding-system-guts (car arg-list
) (car (cdr arg-list
))))
125 ;; This is for XEmacs.
126 ;(defun po-find-file-coding-system (operation filename)
128 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
129 ;Called through file-coding-system-alist, before the file is visited for real."
130 ; (po-find-file-coding-system-guts operation filename))
134 ;; arch-tag: 56748a57-d64c-4200-8f6b-c3a70496eb8c