Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / lisp / textmodes / po.el
blob88ef4dafb76d420cb3cf6f0b5c7b04b25dd96eb5
1 ;;; po.el --- basic support of PO translation files -*- coding: utf-8; -*-
3 ;; Copyright (C) 1995-1998, 2000-2014 Free Software Foundation, Inc.
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
7 ;; Bruno Haible <bruno@clisp.org>.
8 ;; Keywords: i18n, files
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;;; Commentary:
27 ;; This package makes sure visiting PO files decodes them correctly,
28 ;; according to the Charset= header in the PO file. For more support
29 ;; for editing PO files, see po-mode.el.
31 ;;; Code:
33 (defconst po-content-type-charset-alist
34 '(("ASCII" . undecided)
35 ("ANSI_X3.4-1968" . undecided)
36 ("US-ASCII" . undecided))
37 "Alist of coding system versus GNU libc/libiconv canonical charset name.
38 Contains canonical charset names that don't correspond to coding systems.")
40 (defun po-find-charset (filename)
41 "Return PO charset value for FILENAME.
42 If FILENAME is a cons cell, its CDR is a buffer that already contains
43 the PO file (but not yet decoded)."
44 (let ((charset-regexp
45 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
46 (buf (and (consp filename) (cdr filename)))
47 (short-read nil))
48 (when buf
49 (set-buffer buf)
50 (goto-char (point-min)))
51 ;; Try the first 4096 bytes. In case we cannot find the charset value
52 ;; within the first 4096 bytes (the PO file might start with a long
53 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
54 ;; we've checked the empty header entry entirely.
55 (while (not (or short-read (re-search-forward "^msgid" nil t) buf))
56 (save-excursion
57 (goto-char (point-max))
58 (let ((pair (insert-file-contents-literally filename nil
59 (1- (point))
60 (1- (+ (point) 4096)))))
61 (setq short-read (< (nth 1 pair) 4096)))))
62 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
63 ((or short-read buf) nil)
64 ;; We've found the first msgid; maybe, only a part of the msgstr
65 ;; value was loaded. Load the next 1024 bytes; if charset still
66 ;; isn't available, give up.
67 (t (save-excursion
68 (goto-char (point-max))
69 (insert-file-contents-literally filename nil
70 (1- (point))
71 (1- (+ (point) 1024))))
72 (if (re-search-forward charset-regexp nil t)
73 (match-string 1))))))
75 (defun po-find-file-coding-system-guts (operation filename)
76 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
77 Do so according to FILENAME's declared charset.
78 FILENAME may be a cons (NAME . BUFFER). In that case, detect charset
79 in BUFFER."
80 (and
81 (eq operation 'insert-file-contents)
82 (or (if (consp filename) (buffer-live-p (cdr filename)))
83 (file-exists-p filename))
84 (with-temp-buffer
85 (let* ((coding-system-for-read 'no-conversion)
86 (charset (or (po-find-charset filename) "ascii"))
87 assoc)
88 (list (cond
89 ((setq assoc
90 (assoc-string charset
91 po-content-type-charset-alist
92 t))
93 (cdr assoc))
94 ((or (setq assoc (assoc-string charset coding-system-alist t))
95 (setq assoc
96 (assoc-string (subst-char-in-string ?_ ?-
97 charset)
98 coding-system-alist t)))
99 (intern (car assoc)))
100 ;; In principle we should also check the `mime-charset'
101 ;; property of everything in the base coding system
102 ;; list, but there should always be a coding system
103 ;; corresponding to the MIME name.
104 ((featurep 'code-pages)
105 ;; Give up.
106 'raw-text)
108 ;; Try again with code-pages loaded. Maybe it's best
109 ;; to require it initially?
110 (require 'code-pages nil t)
111 (if (or
112 (setq assoc (assoc-string charset coding-system-alist t))
113 (setq assoc (assoc-string (subst-char-in-string
114 ?_ ?- charset)
115 coding-system-alist t)))
116 (intern (car assoc))
117 'raw-text))))))))
119 ;;;###autoload
120 (defun po-find-file-coding-system (arg-list)
121 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
122 Called through `file-coding-system-alist', before the file is visited for real."
123 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
124 ;; This is for XEmacs.
125 ;(defun po-find-file-coding-system (operation filename)
126 ; "\
127 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
128 ;Called through file-coding-system-alist, before the file is visited for real."
129 ; (po-find-file-coding-system-guts operation filename))
131 (provide 'po)
133 ;;; po.el ends here