muse-latex: Handle special characters in a better way.
[muse-el.git] / experimental / muse-xml.el
blob3b527d9c400312ddaa1427eaaff0b013247d185e
1 ;;; muse-xml.el --- Publish XML files.
3 ;; Copyright (C) 2005 Free Software Foundation, Inc.
5 ;; This file is free software; you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation; either version 2, or (at your option)
8 ;; any later version.
10 ;; This file is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with GNU Emacs; see the file COPYING. If not, write to the
17 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 ;; Boston, MA 02110-1301, USA.
20 ;;; Commentary:
22 ;; A Compact RelaxNG schema is available in `examples/muse.rnc'. The
23 ;; current maintainer has no idea how to make use of it, except that
24 ;; it might come in handy with nxml-mode, xml.el, xml-parse.el, or
25 ;; XSLT.
27 ;; This file is currently in experimental state. This means that the
28 ;; published output is subject to change. This also means that you
29 ;; still have the opportunity to correct braindeaded publishing
30 ;; choices by sending email to the list :^) .
32 ;;; Contributors:
34 ;; Peter K. Lee (saint AT corenova DOT com) made the initial
35 ;; implementation of planner-publish.el, which was heavily borrowed
36 ;; from.
38 ;; Brad Collins (brad AT chenla DOT org) provided a Compact RelaxNG
39 ;; schema.
41 ;;; Code:
43 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;; Muse XML Publishing
47 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 (require 'muse-publish)
50 (require 'muse-regexps)
52 (defgroup muse-xml nil
53 "Options controlling the behavior of Muse XML publishing.
54 See `muse-xml' for more information."
55 :group 'muse-publish)
57 (defcustom muse-xml-extension ".xml"
58 "Default file extension for publishing XML files."
59 :type 'string
60 :group 'muse-xml)
62 (defcustom muse-xml-header
63 "<?xml version=\"1.0\" encoding=\"<lisp>
64 (muse-xml-encoding)</lisp>\"?>
65 <MUSE>
66 <pageinfo>
67 <title><lisp>(muse-publishing-directive \"title\")</lisp></title>
68 <author><lisp>(muse-publishing-directive \"author\")</lisp></author>
69 <maintainer><lisp>(muse-style-element :maintainer)</lisp></maintainer>
70 <pubdate><lisp>(muse-publishing-directive \"date\")</lisp></pubdate>
71 </pageinfo>
72 <!-- Page published by Emacs Muse begins here -->\n"
73 "Header used for publishing XML files.
74 This may be text or a filename."
75 :type 'string
76 :group 'muse-xml)
78 (defcustom muse-xml-footer "
79 <!-- Page published by Emacs Muse ends here -->
80 </page>\n"
81 "Footer used for publishing Xml XML files.
82 This may be text or a filename."
83 :type 'string
84 :group 'muse-xml)
86 (defcustom muse-xml-markup-regexps
87 `(;; Join together the parts of a table
88 (10000 ,(concat " </t\\(body\\|head\\|foot\\)>\\s-*</table>\\s-*"
89 "<table[^>]*>\\s-*<t\\1>\n") 0 "")
90 (10100 "</table>\\s-*<table[^>]*>\n" 0 "")
92 ;; Join together the parts of a list
93 (10200 "</list>\\s-*<list[^>]*>\\s-*" 0 "")
95 ;; Beginning of doc, end of doc, or plain paragraph separator
96 (10300 ,(concat "\\(\n</\\(blockquote\\|format\\)>\\)?"
97 "\\(\\(\n\\(["
98 muse-regexp-blank
99 "]*\n\\)+\\)\\|\\`\\s-*\\|\\s-*\\'\\)"
100 "\\(<\\(blockquote\\|format type=\"center\"\\)>\n\\)?")
101 0 muse-xml-markup-paragraph))
102 "List of markup rules for publishing a Muse page to XML.
103 For more on the structure of this list, see `muse-publish-markup-regexps'."
104 :type '(repeat (choice
105 (list :tag "Markup rule"
106 integer
107 (choice regexp symbol)
108 integer
109 (choice string function symbol))
110 function))
111 :group 'muse-xml)
113 (defcustom muse-xml-markup-functions
114 '((anchor . muse-xml-markup-anchor)
115 (table . muse-xml-markup-table))
116 "An alist of style types to custom functions for that kind of text.
117 For more on the structure of this list, see
118 `muse-publish-markup-functions'."
119 :type '(alist :key-type symbol :value-type function)
120 :group 'muse-xml)
122 (defcustom muse-xml-markup-strings
123 '((image-with-desc . "<image href=\"%s\">%s</image>")
124 (image-link . "<image href=\"%s\"></image>")
125 (url-with-image . "<link type=\"image\" href=\"%s\">%s</link>")
126 (url-link . "<link type=\"url\" href=\"%s\">%s</link>")
127 (email-addr . "<link type=\"email\" href=\"%s\">%s</link>")
128 (emdash . " &mdash; ")
129 (rule . "<hr>")
130 (fn-sep . "<hr>\n")
131 (enddots . "....")
132 (dots . "...")
133 (section . "<section level=\"1\"><title>")
134 (section-end . "</title>")
135 (subsection . "<section level=\"2\"><title>")
136 (subsection-end . "</title>")
137 (subsubsection . "<section level=\"3\"><title>")
138 (subsubsection-end . "</title>")
139 (section-other . "<section level=\"%s\"><title>")
140 (section-other-end . "</title>")
141 (section-close . "</section>")
142 (footnote . "<footnote>")
143 (footnote-end . "</footnote>")
144 (begin-underline . "<format type=\"underline\">")
145 (end-underline . "</format>")
146 (begin-literal . "<code>")
147 (end-literal . "</code>")
148 (begin-emph . "<format type=\"emphasis\" level=\"1\">")
149 (end-emph . "</format>")
150 (begin-more-emph . "<format type=\"emphasis\" level=\"2\">")
151 (end-more-emph . "</format>")
152 (begin-most-emph . "<format type=\"emphasis\" level=\"3\">")
153 (end-most-emph . "</format>")
154 (begin-verse . "<verse>\n")
155 (begin-verse-line . "<line>")
156 (end-verse-line . "</line>")
157 (empty-verse-line . "<line />")
158 (begin-last-stanza-line . "<line>")
159 (end-last-stanza-line . "</line>")
160 (end-verse . "</verse>")
161 (begin-example . "<example>")
162 (end-example . "</example>")
163 (begin-center . "<format type=\"center\">\n")
164 (end-center . "\n</format>")
165 (begin-quote . "<blockquote>\n")
166 (end-quote . "\n</blockquote>")
167 (begin-uli . "<list type=\"unordered\">\n<item>")
168 (end-uli . "</item>\n</list>")
169 (begin-oli . "<list type=\"ordered\">\n<item>")
170 (end-oli . "</item>\n</list>")
171 (begin-ddt . "<list type=\"definition\">\n<item><term>")
172 (start-dde . "</term>\n<definition>")
173 (end-ddt . "</definition>\n</item>\n</list>"))
174 "Strings used for marking up text.
175 These cover the most basic kinds of markup, the handling of which
176 differs little between the various styles."
177 :type '(alist :key-type symbol :value-type string)
178 :group 'muse-xml)
180 (defcustom muse-xml-markup-specials
181 '((?\" . "&quot;")
182 (?\< . "&lt;")
183 (?\> . "&gt;")
184 (?\& . "&amp;"))
185 "A table of characters which must be represented specially."
186 :type '(alist :key-type character :value-type string)
187 :group 'muse-xml)
189 (defcustom muse-xml-encoding-default 'utf-8
190 "The default Emacs buffer encoding to use in published files.
191 This will be used if no special characters are found."
192 :type 'symbol
193 :group 'muse-xml)
195 (defcustom muse-xml-charset-default "utf-8"
196 "The default Xml XML charset to use if no translation is
197 found in `muse-xml-encoding-map'."
198 :type 'string
199 :group 'muse-xml)
201 (defcustom muse-xml-encoding-map
202 '((iso-8859-1 . "iso-8859-1")
203 (iso-2022-jp . "iso-2022-jp")
204 (utf-8 . "utf-8")
205 (japanese-iso-8bit . "euc-jp")
206 (chinese-big5 . "big5")
207 (mule-utf-8 . "utf-8")
208 (chinese-iso-8bit . "gb2312")
209 (chinese-gbk . "gbk"))
210 "An alist mapping emacs coding systems to appropriate Xml charsets.
211 Use the base name of the coding system (i.e. without the -unix)."
212 :type '(alist :key-type coding-system :value-type string)
213 :group 'muse-xml)
215 (defun muse-xml-transform-content-type (content-type)
216 "Using `muse-xml-encoding-map', try and resolve an emacs
217 coding system to an associated XML coding system. If no
218 match is found, `muse-xml-charset-default' is used instead."
219 (let ((match (and (fboundp 'coding-system-base)
220 (assoc (coding-system-base content-type)
221 muse-xml-encoding-map))))
222 (if match
223 (cdr match)
224 muse-xml-charset-default)))
226 (defun muse-xml-encoding ()
227 (muse-xml-transform-content-type
228 (or (and (boundp 'buffer-file-coding-system)
229 buffer-file-coding-system)
230 muse-xml-encoding-default)))
232 (defun muse-xml-markup-paragraph ()
233 (let ((end (copy-marker (match-end 0) t)))
234 (goto-char (match-beginning 0))
235 (when (save-excursion
236 (save-match-data
237 (and (re-search-backward "<\\(/?\\)p[ >]"
238 nil t)
239 (not (string-equal (match-string 1) "/")))))
240 (insert "</p>"))
241 (goto-char end))
242 (cond
243 ((eobp)
244 (unless (bolp)
245 (insert "\n")))
246 ((eq (char-after) ?\<)
247 (when (looking-at (concat "<\\(format\\|code\\|link\\|image"
248 "\\|anchor\\|footnote\\)[ >]"))
249 (insert "<p>")))
251 (insert "<p>"))))
253 (defun muse-xml-markup-anchor ()
254 (save-match-data
255 (muse-xml-insert-anchor (match-string 1))) "")
257 (defun muse-xml-insert-anchor (anchor)
258 "Insert an anchor, either around the word at point, or within a tag."
259 (skip-chars-forward muse-regexp-space)
260 (when (looking-at "<\\([^ />]+\\)>")
261 (goto-char (match-end 0)))
262 (insert "<anchor id=\"" anchor "\" />"))
264 (defun muse-xml-markup-table ()
265 (let* ((str (prog1
266 (match-string 1)
267 (delete-region (match-beginning 0) (match-end 0))))
268 (fields (split-string str "\\s-*|+\\s-*"))
269 (type (and (string-match "\\s-*\\(|+\\)\\s-*" str)
270 (length (match-string 1 str))))
271 (part (cond ((= type 1) "tbody")
272 ((= type 2) "thead")
273 ((= type 3) "tfoot")))
274 (col (cond ((= type 1) "td")
275 ((= type 2) "th")
276 ((= type 3) "td"))))
277 (insert "<table>\n"
278 " <" part ">\n"
279 " <tr>\n")
280 (dolist (field fields)
281 (insert " <" col ">" field "</" col ">\n"))
282 (insert " </tr>\n"
283 " </" part ">\n"
284 "</table>\n")))
286 (defun muse-xml-fixup-tables ()
287 "Sort table parts."
288 (goto-char (point-min))
289 (let (last)
290 (while (re-search-forward "^<table[^>]*>$" nil t)
291 (unless (get-text-property (point) 'read-only)
292 (forward-line 1)
293 (save-restriction
294 (let ((beg (point)))
295 (narrow-to-region beg (and (re-search-forward "^</table>"
296 nil t)
297 (match-beginning 0))))
298 (goto-char (point-min))
299 (let ((inhibit-read-only t))
300 (sort-subr nil
301 (function
302 (lambda ()
303 (if (re-search-forward
304 "^\\s-*<t\\(head\\|body\\|foot\\)>$" nil t)
305 (goto-char (match-beginning 0))
306 (goto-char (point-max)))))
307 (function
308 (lambda ()
309 (if (re-search-forward
310 "^\\s-*</t\\(head\\|body\\|foot\\)>$" nil t)
311 (goto-char (match-end 0))
312 (goto-char (point-max)))))
313 (function
314 (lambda ()
315 (looking-at "\\s-*<t\\(head\\|body\\|foot\\)>")
316 (cond ((string= (match-string 1) "head") 1)
317 ((string= (match-string 1) "foot") 2)
318 (t 3)))))))))))
320 (defun muse-xml-finalize-buffer ()
321 (when (boundp 'buffer-file-coding-system)
322 (when (memq buffer-file-coding-system '(no-conversion undecided-unix))
323 ;; make it agree with the default charset
324 (setq buffer-file-coding-system muse-xml-encoding-default))))
326 ;; Register the Muse XML Publisher
328 (unless (assoc "xml" muse-publishing-styles)
329 (muse-define-style "xml"
330 :suffix 'muse-xml-extension
331 :regexps 'muse-xml-markup-regexps
332 :functions 'muse-xml-markup-functions
333 :strings 'muse-xml-markup-strings
334 :specials 'muse-xml-markup-specials
335 :before-end 'muse-xml-fixup-tables
336 :after 'muse-xml-finalize-buffer
337 :header 'muse-xml-header
338 :footer 'muse-xml-footer
339 :browser 'find-file))
341 (provide 'muse-xml)
343 ;;; muse-xml.el ends here