If more than 1 blank separates list/table items, separate them.
[muse-el.git] / experimental / muse-xml.el
blob996a720d45288090d7f603b501fcd2cea3c7ed11
1 ;;; muse-xml.el --- publish XML files
3 ;; Copyright (C) 2005 Free Software Foundation, Inc.
5 ;; This file is free software; you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation; either version 2, or (at your option)
8 ;; any later version.
10 ;; This file is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with GNU Emacs; see the file COPYING. If not, write to the
17 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 ;; Boston, MA 02110-1301, USA.
20 ;;; Commentary:
22 ;; A Compact RelaxNG schema is available in `examples/muse.rnc'. The
23 ;; current maintainer has no idea how to make use of it, except that
24 ;; it might come in handy with nxml-mode, xml.el, xml-parse.el, or
25 ;; XSLT.
27 ;; This file is currently in experimental state. This means that the
28 ;; published output is subject to change. This also means that you
29 ;; still have the opportunity to correct braindeaded publishing
30 ;; choices by sending email to the list :^) .
32 ;;; Contributors:
34 ;; Peter K. Lee (saint AT corenova DOT com) made the initial
35 ;; implementation of planner-publish.el, which was heavily borrowed
36 ;; from.
38 ;; Brad Collins (brad AT chenla DOT org) provided a Compact RelaxNG
39 ;; schema.
41 ;;; Code:
43 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;; Muse XML Publishing
47 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 (require 'muse-publish)
50 (require 'muse-regexps)
52 (defgroup muse-xml nil
53 "Options controlling the behavior of Muse XML publishing.
54 See `muse-xml' for more information."
55 :group 'muse-publish)
57 (defcustom muse-xml-extension ".xml"
58 "Default file extension for publishing XML files."
59 :type 'string
60 :group 'muse-xml)
62 (defcustom muse-xml-header
63 "<?xml version=\"1.0\" encoding=\"<lisp>
64 (muse-xml-encoding)</lisp>\"?>
65 <MUSE>
66 <pageinfo>
67 <title><lisp>(muse-publishing-directive \"title\")</lisp></title>
68 <author><lisp>(muse-publishing-directive \"author\")</lisp></author>
69 <maintainer><lisp>(muse-style-element :maintainer)</lisp></maintainer>
70 <pubdate><lisp>(muse-publishing-directive \"date\")</lisp></pubdate>
71 </pageinfo>
72 <!-- Page published by Emacs Muse begins here -->\n"
73 "Header used for publishing XML files.
74 This may be text or a filename."
75 :type 'string
76 :group 'muse-xml)
78 (defcustom muse-xml-footer "
79 <!-- Page published by Emacs Muse ends here -->
80 </page>\n"
81 "Footer used for publishing Xml XML files.
82 This may be text or a filename."
83 :type 'string
84 :group 'muse-xml)
86 (defcustom muse-xml-markup-regexps
87 `(;; Join together the parts of a table
88 (10000 ,(concat " </t\\(body\\|head\\|foot\\)>\\s-*</table>"
89 "\\([" muse-regexp-blank "]*\n\\)\\{0,2\\}"
90 "[" muse-regexp-blank "]*"
91 "<table[^>]*>\\s-*<t\\1>\n")
92 0 "")
93 (10100 ,(concat "</table>"
94 "\\([" muse-regexp-blank "]*\n\\)\\{0,2\\}"
95 "[" muse-regexp-blank "]*"
96 "<table[^>]*>\n")
97 0 "")
99 ;; Join together the parts of a list
100 (10200 ,(concat "</list>"
101 "\\([" muse-regexp-blank "]*\n\\)\\{0,2\\}"
102 "[" muse-regexp-blank "]*"
103 "<list[^>]*>\\s-*")
104 0 "")
106 ;; Beginning of doc, end of doc, or plain paragraph separator
107 (10300 ,(concat "\\(\n</\\(blockquote\\|format\\)>\\)?"
108 "\\(\\(\n\\(["
109 muse-regexp-blank
110 "]*\n\\)+\\)\\|\\`\\s-*\\|\\s-*\\'\\)"
111 "\\(<\\(blockquote\\|format type=\"center\"\\)>\n\\)?")
112 0 muse-xml-markup-paragraph))
113 "List of markup rules for publishing a Muse page to XML.
114 For more on the structure of this list, see `muse-publish-markup-regexps'."
115 :type '(repeat (choice
116 (list :tag "Markup rule"
117 integer
118 (choice regexp symbol)
119 integer
120 (choice string function symbol))
121 function))
122 :group 'muse-xml)
124 (defcustom muse-xml-markup-functions
125 '((anchor . muse-xml-markup-anchor)
126 (table . muse-xml-markup-table))
127 "An alist of style types to custom functions for that kind of text.
128 For more on the structure of this list, see
129 `muse-publish-markup-functions'."
130 :type '(alist :key-type symbol :value-type function)
131 :group 'muse-xml)
133 (defcustom muse-xml-markup-strings
134 '((image-with-desc . "<image href=\"%s\">%s</image>")
135 (image-link . "<image href=\"%s\"></image>")
136 (url-with-image . "<link type=\"image\" href=\"%s\">%s</link>")
137 (url-link . "<link type=\"url\" href=\"%s\">%s</link>")
138 (email-addr . "<link type=\"email\" href=\"%s\">%s</link>")
139 (emdash . " &mdash; ")
140 (rule . "<hr>")
141 (fn-sep . "<hr>\n")
142 (enddots . "....")
143 (dots . "...")
144 (section . "<section level=\"1\"><title>")
145 (section-end . "</title>")
146 (subsection . "<section level=\"2\"><title>")
147 (subsection-end . "</title>")
148 (subsubsection . "<section level=\"3\"><title>")
149 (subsubsection-end . "</title>")
150 (section-other . "<section level=\"%s\"><title>")
151 (section-other-end . "</title>")
152 (section-close . "</section>")
153 (footnote . "<footnote>")
154 (footnote-end . "</footnote>")
155 (begin-underline . "<format type=\"underline\">")
156 (end-underline . "</format>")
157 (begin-literal . "<code>")
158 (end-literal . "</code>")
159 (begin-emph . "<format type=\"emphasis\" level=\"1\">")
160 (end-emph . "</format>")
161 (begin-more-emph . "<format type=\"emphasis\" level=\"2\">")
162 (end-more-emph . "</format>")
163 (begin-most-emph . "<format type=\"emphasis\" level=\"3\">")
164 (end-most-emph . "</format>")
165 (begin-verse . "<verse>\n")
166 (begin-verse-line . "<line>")
167 (end-verse-line . "</line>")
168 (empty-verse-line . "<line />")
169 (begin-last-stanza-line . "<line>")
170 (end-last-stanza-line . "</line>")
171 (end-verse . "</verse>")
172 (begin-example . "<example>")
173 (end-example . "</example>")
174 (begin-center . "<format type=\"center\">\n")
175 (end-center . "\n</format>")
176 (begin-quote . "<blockquote>\n")
177 (end-quote . "\n</blockquote>")
178 (begin-uli . "<list type=\"unordered\">\n<item>")
179 (end-uli . "</item>\n</list>")
180 (begin-oli . "<list type=\"ordered\">\n<item>")
181 (end-oli . "</item>\n</list>")
182 (begin-ddt . "<list type=\"definition\">\n<item><term>")
183 (start-dde . "</term>\n<definition>")
184 (end-ddt . "</definition>\n</item>\n</list>"))
185 "Strings used for marking up text.
186 These cover the most basic kinds of markup, the handling of which
187 differs little between the various styles."
188 :type '(alist :key-type symbol :value-type string)
189 :group 'muse-xml)
191 (defcustom muse-xml-markup-specials
192 '((?\" . "&quot;")
193 (?\< . "&lt;")
194 (?\> . "&gt;")
195 (?\& . "&amp;"))
196 "A table of characters which must be represented specially."
197 :type '(alist :key-type character :value-type string)
198 :group 'muse-xml)
200 (defcustom muse-xml-encoding-default 'utf-8
201 "The default Emacs buffer encoding to use in published files.
202 This will be used if no special characters are found."
203 :type 'symbol
204 :group 'muse-xml)
206 (defcustom muse-xml-charset-default "utf-8"
207 "The default Xml XML charset to use if no translation is
208 found in `muse-xml-encoding-map'."
209 :type 'string
210 :group 'muse-xml)
212 (defcustom muse-xml-encoding-map
213 '((iso-8859-1 . "iso-8859-1")
214 (iso-2022-jp . "iso-2022-jp")
215 (utf-8 . "utf-8")
216 (japanese-iso-8bit . "euc-jp")
217 (chinese-big5 . "big5")
218 (mule-utf-8 . "utf-8")
219 (chinese-iso-8bit . "gb2312")
220 (chinese-gbk . "gbk"))
221 "An alist mapping emacs coding systems to appropriate Xml charsets.
222 Use the base name of the coding system (i.e. without the -unix)."
223 :type '(alist :key-type coding-system :value-type string)
224 :group 'muse-xml)
226 (defun muse-xml-transform-content-type (content-type)
227 "Using `muse-xml-encoding-map', try and resolve an emacs
228 coding system to an associated XML coding system. If no
229 match is found, `muse-xml-charset-default' is used instead."
230 (let ((match (and (fboundp 'coding-system-base)
231 (assoc (coding-system-base content-type)
232 muse-xml-encoding-map))))
233 (if match
234 (cdr match)
235 muse-xml-charset-default)))
237 (defun muse-xml-encoding ()
238 (muse-xml-transform-content-type
239 (or (and (boundp 'buffer-file-coding-system)
240 buffer-file-coding-system)
241 muse-xml-encoding-default)))
243 (defun muse-xml-markup-paragraph ()
244 (let ((end (copy-marker (match-end 0) t)))
245 (goto-char (match-beginning 0))
246 (when (save-excursion
247 (save-match-data
248 (and (re-search-backward "<\\(/?\\)p[ >]"
249 nil t)
250 (not (string-equal (match-string 1) "/")))))
251 (insert "</p>"))
252 (goto-char end))
253 (cond
254 ((eobp)
255 (unless (bolp)
256 (insert "\n")))
257 ((eq (char-after) ?\<)
258 (when (looking-at (concat "<\\(format\\|code\\|link\\|image"
259 "\\|anchor\\|footnote\\)[ >]"))
260 (insert "<p>")))
262 (insert "<p>"))))
264 (defun muse-xml-markup-anchor ()
265 (save-match-data
266 (muse-xml-insert-anchor (match-string 1))) "")
268 (defun muse-xml-insert-anchor (anchor)
269 "Insert an anchor, either around the word at point, or within a tag."
270 (skip-chars-forward muse-regexp-space)
271 (when (looking-at "<\\([^ />]+\\)>")
272 (goto-char (match-end 0)))
273 (insert "<anchor id=\"" anchor "\" />"))
275 (defun muse-xml-markup-table ()
276 (let* ((str (prog1
277 (match-string 1)
278 (delete-region (match-beginning 0) (match-end 0))))
279 (fields (split-string str "\\s-*|+\\s-*"))
280 (type (and (string-match "\\s-*\\(|+\\)\\s-*" str)
281 (length (match-string 1 str))))
282 (part (cond ((= type 1) "tbody")
283 ((= type 2) "thead")
284 ((= type 3) "tfoot")))
285 (col (cond ((= type 1) "td")
286 ((= type 2) "th")
287 ((= type 3) "td"))))
288 (insert "<table>\n"
289 " <" part ">\n"
290 " <tr>\n")
291 (dolist (field fields)
292 (insert " <" col ">" field "</" col ">\n"))
293 (insert " </tr>\n"
294 " </" part ">\n"
295 "</table>\n")))
297 (defun muse-xml-fixup-tables ()
298 "Sort table parts."
299 (goto-char (point-min))
300 (let (last)
301 (while (re-search-forward "^<table[^>]*>$" nil t)
302 (unless (get-text-property (point) 'read-only)
303 (forward-line 1)
304 (save-restriction
305 (let ((beg (point)))
306 (narrow-to-region beg (and (re-search-forward "^</table>"
307 nil t)
308 (match-beginning 0))))
309 (goto-char (point-min))
310 (let ((inhibit-read-only t))
311 (sort-subr nil
312 (function
313 (lambda ()
314 (if (re-search-forward
315 "^\\s-*<t\\(head\\|body\\|foot\\)>$" nil t)
316 (goto-char (match-beginning 0))
317 (goto-char (point-max)))))
318 (function
319 (lambda ()
320 (if (re-search-forward
321 "^\\s-*</t\\(head\\|body\\|foot\\)>$" nil t)
322 (goto-char (match-end 0))
323 (goto-char (point-max)))))
324 (function
325 (lambda ()
326 (looking-at "\\s-*<t\\(head\\|body\\|foot\\)>")
327 (cond ((string= (match-string 1) "head") 1)
328 ((string= (match-string 1) "foot") 2)
329 (t 3)))))))))))
331 (defun muse-xml-finalize-buffer ()
332 (when (boundp 'buffer-file-coding-system)
333 (when (memq buffer-file-coding-system '(no-conversion undecided-unix))
334 ;; make it agree with the default charset
335 (setq buffer-file-coding-system muse-xml-encoding-default))))
337 ;; Register the Muse XML Publisher
339 (unless (assoc "xml" muse-publishing-styles)
340 (muse-define-style "xml"
341 :suffix 'muse-xml-extension
342 :regexps 'muse-xml-markup-regexps
343 :functions 'muse-xml-markup-functions
344 :strings 'muse-xml-markup-strings
345 :specials 'muse-xml-markup-specials
346 :before-end 'muse-xml-fixup-tables
347 :after 'muse-xml-finalize-buffer
348 :header 'muse-xml-header
349 :footer 'muse-xml-footer
350 :browser 'find-file))
352 (provide 'muse-xml)
354 ;;; muse-xml.el ends here