(latexenc-find-file-coding-system): Don't inherit the EOL part of the
[emacs.git] / lisp / textmodes / po.el
blobc2e58d7d5823c3bdce6fac85c0b429cd47e5ffea
1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
3 ;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc.
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
7 ;; Bruno Haible <bruno@clisp.org>.
8 ;; Keywords: i18n, files
10 ;; This file is part of GNU Emacs.
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
27 ;;; Commentary:
29 ;; This package makes sure visiting PO files decodes them correctly,
30 ;; according to the Charset= header in the PO file. For more support
31 ;; for editing PO files, see po-mode.el.
33 ;;; Code:
35 (defconst po-content-type-charset-alist
36 '(("ASCII" . undecided)
37 ("ANSI_X3.4-1968" . undecided)
38 ("US-ASCII" . undecided))
39 "Alist of coding system versus GNU libc/libiconv canonical charset name.
40 Contains canonical charset names that don't correspond to coding systems.")
42 (defun po-find-charset (filename)
43 "Return PO charset value for FILENAME."
44 (let ((charset-regexp
45 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
46 (short-read nil))
47 ;; Try the first 4096 bytes. In case we cannot find the charset value
48 ;; within the first 4096 bytes (the PO file might start with a long
49 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
50 ;; we've checked the empty header entry entirely.
51 (while (not (or short-read (re-search-forward "^msgid" nil t)))
52 (save-excursion
53 (goto-char (point-max))
54 (let ((pair (insert-file-contents-literally filename nil
55 (1- (point))
56 (1- (+ (point) 4096)))))
57 (setq short-read (< (nth 1 pair) 4096)))))
58 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
59 (short-read nil)
60 ;; We've found the first msgid; maybe, only a part of the msgstr
61 ;; value was loaded. Load the next 1024 bytes; if charset still
62 ;; isn't available, give up.
63 (t (save-excursion
64 (goto-char (point-max))
65 (insert-file-contents-literally filename nil
66 (1- (point))
67 (1- (+ (point) 1024))))
68 (if (re-search-forward charset-regexp nil t)
69 (match-string 1))))))
71 (defun po-find-file-coding-system-guts (operation filename)
72 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
73 Do so according to FILENAME's declared charset."
74 (and
75 (eq operation 'insert-file-contents)
76 (file-exists-p filename)
77 (with-temp-buffer
78 (let* ((coding-system-for-read 'no-conversion)
79 (charset (or (po-find-charset filename) "ascii"))
80 assoc)
81 (list (cond
82 ((setq assoc
83 (assoc-string charset
84 po-content-type-charset-alist
85 t))
86 (cdr assoc))
87 ((or (setq assoc (assoc-string charset coding-system-alist t))
88 (setq assoc
89 (assoc-string (subst-char-in-string ?_ ?-
90 charset)
91 coding-system-alist t)))
92 (intern (car assoc)))
93 ;; In principle we should also check the `mime-charset'
94 ;; property of everything in the base coding system
95 ;; list, but there should always be a coding system
96 ;; corresponding to the MIME name.
97 ((featurep 'code-pages)
98 ;; Give up.
99 'raw-text)
101 ;; Try again with code-pages loaded. Maybe it's best
102 ;; to require it initially?
103 (require 'code-pages nil t)
104 (if (or
105 (setq assoc (assoc-string charset coding-system-alist t))
106 (setq assoc (assoc-string (subst-char-in-string
107 ?_ ?- charset)
108 coding-system-alist t)))
109 (intern (car assoc))
110 'raw-text))))))))
112 ;;;###autoload
113 (defun po-find-file-coding-system (arg-list)
114 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
115 Called through `file-coding-system-alist', before the file is visited for real."
116 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
117 ;; This is for XEmacs.
118 ;(defun po-find-file-coding-system (operation filename)
119 ; "\
120 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
121 ;Called through file-coding-system-alist, before the file is visited for real."
122 ; (po-find-file-coding-system-guts operation filename))
124 (provide 'po)
126 ;;; arch-tag: 56748a57-d64c-4200-8f6b-c3a70496eb8c
127 ;;; po.el ends here