Output encoding support, using Babel
[cxml.git] / xml / unparse.lisp
blobd5bc8ed49d7e1b009f7dbcc60a54257105a59801
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Unparse XML
4 ;;; Title: (including support for canonic XML according to J.Clark)
5 ;;; Created: 1999-09-09
6 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
7 ;;; Author: David Lichteblau <david@lichteblau.com>
8 ;;; License: Lisp-LGPL (See file COPYING for details).
9 ;;; ---------------------------------------------------------------------------
10 ;;; (c) copyright 1999 by Gilbert Baumann
11 ;;; (c) copyright 2004 by knowledgeTools Int. GmbH
12 ;;; (c) copyright 2004 by David Lichteblau (for headcraft.de)
13 ;;; (c) copyright 2005-2008 by David Lichteblau
15 ;;; This library is free software; you can redistribute it and/or
16 ;;; modify it under the terms of the GNU Library General Public
17 ;;; License as published by the Free Software Foundation; either
18 ;;; version 2 of the License, or (at your option) any later version.
19 ;;;
20 ;;; This library is distributed in the hope that it will be useful,
21 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 ;;; Library General Public License for more details.
24 ;;;
25 ;;; You should have received a copy of the GNU Library General Public
26 ;;; License along with this library; if not, write to the
27 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
28 ;;; Boston, MA 02111-1307 USA.
30 (in-package :cxml)
32 ;;
33 ;; | Canonical XML
34 ;; | =============
35 ;; |
36 ;; | This document defines a subset of XML called canonical XML. The
37 ;; | intended use of canonical XML is in testing XML processors, as a
38 ;; | representation of the result of parsing an XML document.
39 ;; |
40 ;; | Every well-formed XML document has a unique structurally equivalent
41 ;; | canonical XML document. Two structurally equivalent XML documents have
42 ;; | a byte-for-byte identical canonical XML document. Canonicalizing an
43 ;; | XML document requires only information that an XML processor is
44 ;; | required to make available to an application.
45 ;; |
46 ;; | A canonical XML document conforms to the following grammar:
47 ;; |
48 ;; | CanonXML ::= Pi* element Pi*
49 ;; | element ::= Stag (Datachar | Pi | element)* Etag
50 ;; | Stag ::= '<' Name Atts '>'
51 ;; | Etag ::= '</' Name '>'
52 ;; | Pi ::= '<?' Name ' ' (((Char - S) Char*)? - (Char* '?>' Char*)) '?>'
53 ;; | Atts ::= (' ' Name '=' '"' Datachar* '"')*
54 ;; | Datachar ::= '&amp;' | '&lt;' | '&gt;' | '&quot;'
55 ;; | | '&#9;'| '&#10;'| '&#13;'
56 ;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD))
57 ;; | Name ::= (see XML spec)
58 ;; | Char ::= (see XML spec)
59 ;; | S ::= (see XML spec)
60 ;; |
61 ;; | Attributes are in lexicographical order (in Unicode bit order).
62 ;; |
63 ;; | A canonical XML document is encoded in UTF-8.
64 ;; |
65 ;; | Ignorable white space is considered significant and is treated
66 ;; | equivalently to data.
68 ;; -- James Clark (jjc@jclark.com)
71 ;;;; SINK: an xml output sink
73 (defclass sink (sax:content-handler)
74 ((ystream :initarg :ystream :accessor sink-ystream)
75 (width :initform 79 :initarg :width :accessor width)
76 (canonical :initform nil :initarg :canonical :accessor canonical)
77 (indentation :initform nil :initarg :indentation :accessor indentation)
78 (current-indentation :initform 0 :accessor current-indentation)
79 (notations :initform (make-buffer :element-type t) :accessor notations)
80 (name-for-dtd :accessor name-for-dtd)
81 (previous-notation :initform nil :accessor previous-notation)
82 (have-doctype :initform nil :accessor have-doctype)
83 (have-internal-subset :initform nil :accessor have-internal-subset)
84 (stack :initform nil :accessor stack)
85 (sink-omit-xml-declaration-p :initform nil
86 :initarg :omit-xml-declaration-p
87 :accessor sink-omit-xml-declaration-p)
88 (encoding :initarg :encoding :reader sink-encoding)))
90 #-rune-is-character
91 (defmethod hax:%want-strings-p ((handler sink))
92 nil)
94 (defmethod initialize-instance :after ((instance sink) &key)
95 (when (eq (canonical instance) t)
96 (setf (canonical instance) 1))
97 (unless (member (canonical instance) '(nil 1 2))
98 (error "Invalid canonical form: ~A" (canonical instance)))
99 (when (and (canonical instance) (indentation instance))
100 (error "Cannot indent XML in canonical mode"))
101 (when (and (canonical instance)
102 (not (eq (ystream-encoding (sink-ystream instance)) :utf-8)))
103 (error "Cannot use non-UTF-8 encoding in canonical mode"))
104 (when (let ((encoding (ystream-encoding (sink-ystream instance))))
105 (and (not (symbolp encoding))
106 (eq (babel-encodings:enc-name encoding) :utf-16)))
107 (sink-write-rune #/U+FEFF instance)))
109 (defun make-buffer (&key (element-type '(unsigned-byte 8)))
110 (make-array 1
111 :element-type element-type
112 :adjustable t
113 :fill-pointer 0))
115 (defun find-output-encoding (name)
116 (when (stringp name)
117 (setf name (find-symbol (string-upcase name) :keyword)))
118 (cond
119 ((null name)
120 (warn "Unknown encoding ~A, falling back to UTF-8" name)
121 :utf-8)
122 ((find name '(:utf-8 :utf_8 :utf8))
123 :utf-8)
124 #-rune-is-character
126 (warn "Unknown encoding ~A, falling back to UTF-8" name)
127 :utf-8)
128 #+rune-is-character
130 (handler-case
131 (babel-encodings:get-character-encoding name)
132 (error ()
133 (warn "Unknown encoding ~A, falling back to UTF-8" name)
134 :utf-8)))))
136 ;; bisschen unschoen hier die ganze api zu duplizieren, aber die
137 ;; ystreams sind noch undokumentiert
138 (macrolet ((define-maker (make-sink make-ystream &rest args)
139 `(defun ,make-sink (,@args &rest initargs
140 &key encoding &allow-other-keys)
141 (let* ((encoding (or encoding "UTF-8"))
142 (ystream (,make-ystream ,@args)))
143 (setf (ystream-encoding ystream)
144 (find-output-encoding encoding))
145 (apply #'make-instance
146 'sink
147 :ystream ystream
148 :encoding encoding
149 initargs)))))
150 (define-maker make-octet-vector-sink make-octet-vector-ystream)
151 (define-maker make-octet-stream-sink make-octet-stream-ystream stream)
152 (define-maker make-rod-sink make-rod-ystream)
154 #+rune-is-character
155 (define-maker make-character-stream-sink make-character-stream-ystream stream)
157 #-rune-is-character
158 (define-maker make-string-sink/utf8 make-string-ystream/utf8)
160 #-rune-is-character
161 (define-maker make-character-stream-sink/utf8
162 make-character-stream-ystream/utf8
163 stream))
165 #+rune-is-character
166 (defun make-string-sink (&rest args) (apply #'make-rod-sink args))
169 (defmethod sax:end-document ((sink sink))
170 (close-ystream (sink-ystream sink)))
173 ;;;; doctype and notations
175 (defmethod sax:start-document ((sink sink))
176 (unless (or (canonical sink)
177 (sink-omit-xml-declaration-p sink))
178 (sink-write-rod #"<?xml version=\"1.0\" encoding=\"" sink)
179 (sink-write-rod (rod (sink-encoding sink)) sink)
180 (sink-write-rod #"\"?>" sink)
181 (sink-write-rune #/U+000A sink)))
183 (defmethod sax:start-dtd ((sink sink) name public-id system-id)
184 (setf (name-for-dtd sink) name)
185 (unless (canonical sink)
186 (ensure-doctype sink public-id system-id)))
188 (defun ensure-doctype (sink &optional public-id system-id)
189 (unless (have-doctype sink)
190 (setf (have-doctype sink) t)
191 (sink-write-rod #"<!DOCTYPE " sink)
192 (sink-write-rod (name-for-dtd sink) sink)
193 (cond
194 ((not (zerop (length public-id)))
195 (sink-write-rod #" PUBLIC \"" sink)
196 (sink-write-escapable-rod public-id sink)
197 (sink-write-rod #"\" \"" sink)
198 (sink-write-escapable-rod system-id sink)
199 (sink-write-rod #"\"" sink))
200 ((not (zerop (length system-id)))
201 (sink-write-rod #" SYSTEM \"" sink)
202 (sink-write-escapable-rod system-id sink)
203 (sink-write-rod #"\"" sink)))))
205 (defmethod sax:start-internal-subset ((sink sink))
206 (when (have-internal-subset sink)
207 (error "duplicate internal subset"))
208 (setf (have-internal-subset sink) t)
209 (ensure-doctype sink)
210 (sink-write-rod #" [" sink)
211 (sink-write-rune #/U+000A sink))
213 (defmethod sax:end-internal-subset ((sink sink))
214 (ensure-doctype sink)
215 (sink-write-rod #"]" sink))
217 (defmethod sax:unparsed-internal-subset ((sink sink) str)
218 (when (have-internal-subset sink)
219 (error "duplicate internal subset"))
220 (setf (have-internal-subset sink) t)
221 (ensure-doctype sink)
222 (sink-write-rod #" [" sink)
223 (sink-write-rune #/U+000A sink)
224 (sink-write-rod str sink)
225 (sink-write-rod #"]" sink))
227 ;; for the benefit of the XML test suite, prefer ' over "
228 (defun write-quoted-rod (x sink)
229 (let ((q (if (find #/' x) #/" #/'
230 ;; '" (thanks you Emacs indentation, the if ends here)
232 (sink-write-rune q sink)
233 (sink-write-rod x sink)
234 (sink-write-rune q sink)))
236 (defmethod sax:notation-declaration ((sink sink) name public-id system-id)
237 (let ((prev (previous-notation sink)))
238 (when (and (and (canonical sink) (>= (canonical sink) 2))
239 prev
240 (not (rod< prev name)))
241 (error "misordered notations; cannot unparse canonically"))
242 (setf (previous-notation sink) name))
243 (sink-write-rod #"<!NOTATION " sink)
244 (sink-write-rod name sink)
245 (cond
246 ((zerop (length public-id))
247 (sink-write-rod #" SYSTEM " sink)
248 (write-quoted-rod system-id sink))
249 ((zerop (length system-id))
250 (sink-write-rod #" PUBLIC " sink)
251 (write-quoted-rod public-id sink))
253 (sink-write-rod #" PUBLIC " sink)
254 (write-quoted-rod public-id sink)
255 (sink-write-rod #" " sink)
256 (write-quoted-rod system-id sink)))
257 (sink-write-rune #/> sink)
258 (sink-write-rune #/U+000A sink))
260 (defmethod sax:unparsed-entity-declaration
261 ((sink sink) name public-id system-id notation-name)
262 (unless (and (canonical sink) (< (canonical sink) 3))
263 (sink-write-rod #"<!ENTITY " sink)
264 (sink-write-rod name sink)
265 (cond
266 ((zerop (length public-id))
267 (sink-write-rod #" SYSTEM " sink)
268 (write-quoted-rod system-id sink))
269 ((zerop (length system-id))
270 (sink-write-rod #" PUBLIC " sink)
271 (write-quoted-rod public-id sink))
273 (sink-write-rod #" PUBLIC " sink)
274 (write-quoted-rod public-id sink)
275 (sink-write-rod #" " sink)
276 (write-quoted-rod system-id sink)))
277 (sink-write-rod #" NDATA " sink)
278 (sink-write-rod notation-name sink)
279 (sink-write-rune #/> sink)
280 (sink-write-rune #/U+000A sink)))
282 (defmethod sax:external-entity-declaration
283 ((sink sink) kind name public-id system-id)
284 (when (canonical sink)
285 (error "cannot serialize parsed entities in canonical mode"))
286 (sink-write-rod #"<!ENTITY " sink)
287 (when (eq kind :parameter)
288 (sink-write-rod #" % " sink))
289 (sink-write-rod name sink)
290 (cond
291 ((zerop (length public-id))
292 (sink-write-rod #" SYSTEM " sink)
293 (write-quoted-rod system-id sink))
294 ((zerop (length system-id))
295 (sink-write-rod #" PUBLIC " sink)
296 (write-quoted-rod public-id sink))
298 (sink-write-rod #" PUBLIC " sink)
299 (write-quoted-rod public-id sink)
300 (sink-write-rod #" " sink)
301 (write-quoted-rod system-id sink)))
302 (sink-write-rune #/> sink)
303 (sink-write-rune #/U+000A sink))
305 (defmethod sax:internal-entity-declaration ((sink sink) kind name value)
306 (when (canonical sink)
307 (error "cannot serialize parsed entities in canonical mode"))
308 (sink-write-rod #"<!ENTITY " sink)
309 (when (eq kind :parameter)
310 (sink-write-rod #" % " sink))
311 (sink-write-rod name sink)
312 (sink-write-rune #/U+0020 sink)
313 (sink-write-rune #/\" sink)
314 (sink-write-escapable-rod/dtd value sink)
315 (sink-write-rune #/\" sink)
316 (sink-write-rune #/> sink)
317 (sink-write-rune #/U+000A sink))
319 (defmethod sax:element-declaration ((sink sink) name model)
320 (when (canonical sink)
321 (error "cannot serialize element type declarations in canonical mode"))
322 (sink-write-rod #"<!ELEMENT " sink)
323 (sink-write-rod name sink)
324 (sink-write-rune #/U+0020 sink)
325 (labels ((walk (m)
326 (cond
327 ((eq m :EMPTY)
328 (sink-write-rod "EMPTY" sink))
329 ((eq m :PCDATA)
330 (sink-write-rod "#PCDATA" sink))
331 ((eq m :ANY)
332 (sink-write-rod "ANY" sink))
333 ((atom m)
334 (sink-write-escapable-rod m sink))
336 (ecase (car m)
337 (and
338 (sink-write-rune #/\( sink)
339 (loop for (n . rest) on (cdr m) do
340 (walk n)
341 (when rest
342 (sink-write-rune #\, sink)))
343 (sink-write-rune #/\) sink))
345 (sink-write-rune #/\( sink)
346 (loop for (n . rest) on (cdr m) do
347 (walk n)
348 (when rest
349 (sink-write-rune #\| sink)))
350 (sink-write-rune #/\) sink))
352 (walk (second m))
353 (sink-write-rune #/* sink))
355 (walk (second m))
356 (sink-write-rune #/+ sink))
358 (walk (second m))
359 (sink-write-rune #/? sink)))))))
360 (walk model))
361 (sink-write-rune #/> sink)
362 (sink-write-rune #/U+000A sink))
364 (defmethod sax:attribute-declaration ((sink sink) ename aname type default)
365 (when (canonical sink)
366 (error "cannot serialize attribute type declarations in canonical mode"))
367 (sink-write-rod #"<!ATTLIST " sink)
368 (sink-write-rod ename sink)
369 (sink-write-rune #/U+0020 sink)
370 (sink-write-rod aname sink)
371 (sink-write-rune #/U+0020 sink)
372 (cond
373 ((atom type)
374 (sink-write-rod (rod (string-upcase (symbol-name type))) sink))
376 (when (eq :NOTATION (car type))
377 (sink-write-rod #"NOTATION " sink))
378 (sink-write-rune #/\( sink)
379 (loop for (n . rest) on (cdr type) do
380 (sink-write-rod n sink)
381 (when rest
382 (sink-write-rune #\| sink)))
383 (sink-write-rune #/\) sink)))
384 (sink-write-rune #/U+0020 sink)
385 (cond
386 ((atom default)
387 (sink-write-rune #/# sink)
388 (sink-write-rod (rod (string-upcase (symbol-name default))) sink))
390 (when (eq :FIXED (car default))
391 (sink-write-rod #"#FIXED " sink))
392 (sink-write-rune #/\" sink)
393 (sink-write-escapable-rod (second default) sink)
394 (sink-write-rune #/\" sink)))
395 (sink-write-rune #/> sink)
396 (sink-write-rune #/U+000A sink))
398 (defmethod sax:end-dtd ((sink sink))
399 (when (have-doctype sink)
400 (sink-write-rod #">" sink)
401 (sink-write-rune #/U+000A sink)))
404 ;;;; elements
406 (defstruct (tag (:constructor make-tag (name)))
407 name
408 (n-children 0)
409 (have-gt nil))
411 (defun sink-fresh-line (sink)
412 (unless (zerop (ystream-column (sink-ystream sink)))
413 (sink-write-rune #/U+000A sink) ;newline
414 (indent sink)))
416 (defun maybe-close-tag (sink)
417 (let ((tag (car (stack sink))))
418 (when (and (tag-p tag) (not (tag-have-gt tag)))
419 (setf (tag-have-gt tag) t)
420 (sink-write-rune #/> sink))))
422 (defmethod sax:start-element
423 ((sink sink) namespace-uri local-name qname attributes)
424 (declare (ignore namespace-uri local-name))
425 (maybe-close-tag sink)
426 (when (stack sink)
427 (incf (tag-n-children (first (stack sink)))))
428 (push (make-tag qname) (stack sink))
429 (when (indentation sink)
430 (sink-fresh-line sink)
431 (start-indentation-block sink))
432 (sink-write-rune #/< sink)
433 (sink-write-rod qname sink)
434 (dolist (a (if (canonical sink)
435 (sort (copy-list attributes)
436 #'rod<
437 :key #'sax:attribute-qname)
438 attributes))
439 (sink-write-rune #/space sink)
440 (sink-write-rod (sax:attribute-qname a) sink)
441 (sink-write-rune #/= sink)
442 (sink-write-rune #/\" sink)
443 (if (canonical sink)
444 (sink-write-escapable-rod/canonical (sax:attribute-value a) sink)
445 (sink-write-escapable-rod/attribute (sax:attribute-value a) sink))
446 (sink-write-rune #/\" sink))
447 (when (canonical sink)
448 (maybe-close-tag sink)))
450 (defmethod sax:end-element
451 ((sink sink) namespace-uri local-name qname)
452 (declare (ignore namespace-uri local-name))
453 (let ((tag (pop (stack sink))))
454 (unless (tag-p tag)
455 (error "output does not nest: not in an element"))
456 (unless (rod= (tag-name tag) qname)
457 (error "output does not nest: expected ~A but got ~A"
458 (rod qname) (rod (tag-name tag))))
459 (when (indentation sink)
460 (end-indentation-block sink)
461 (unless (zerop (tag-n-children tag))
462 (sink-fresh-line sink)))
463 (cond
464 ((tag-have-gt tag)
465 (sink-write-rod '#.(string-rod "</") sink)
466 (sink-write-rod qname sink)
467 (sink-write-rod '#.(string-rod ">") sink))
469 (sink-write-rod #"/>" sink)))))
471 (defmethod sax:processing-instruction ((sink sink) target data)
472 (maybe-close-tag sink)
473 (unless (rod-equal target '#.(string-rod "xml"))
474 (sink-write-rod '#.(string-rod "<?") sink)
475 (sink-write-rod target sink)
476 (cond
477 ((plusp (length data))
478 (sink-write-rune #/space sink)
479 (sink-write-rod data sink))
480 ((canonical sink)
481 (sink-write-rune #/space sink)))
482 (sink-write-rod '#.(string-rod "?>") sink)))
484 (defmethod sax:start-cdata ((sink sink))
485 (maybe-close-tag sink)
486 (push :cdata (stack sink)))
488 (defmethod sax:characters ((sink sink) data)
489 (maybe-close-tag sink)
490 (cond
491 ((and (eq (car (stack sink)) :cdata)
492 (not (canonical sink))
493 (not (search #"]]" data)))
494 (when (indentation sink)
495 (sink-fresh-line sink))
496 (sink-write-rod #"<![CDATA[" sink)
497 ;; XXX signal error if body is unprintable?
498 ;; zzz no, in that case, split into multiple CDATA sections
499 (map nil (lambda (c) (sink-write-rune c sink)) data)
500 (sink-write-rod #"]]>" sink))
502 (if (indentation sink)
503 (unparse-indented-text data sink)
504 (if (canonical sink)
505 (sink-write-escapable-rod/canonical data sink)
506 (sink-write-escapable-rod data sink))))))
508 (defmethod sax:unescaped ((sink sink) data)
509 (maybe-close-tag sink)
510 (sink-write-rod data sink))
512 (defmethod sax:comment ((sink sink) data)
513 (maybe-close-tag sink)
514 (unless (canonical sink)
515 ;; XXX signal error if body is unprintable?
516 (sink-write-rod #"<!--" sink)
517 (map nil (lambda (c) (sink-write-rune c sink)) data)
518 (sink-write-rod #"-->" sink)))
520 (defmethod sax:end-cdata ((sink sink))
521 (unless (eq (pop (stack sink)) :cdata)
522 (error "output does not nest: not in a cdata section")))
524 (defun indent (sink)
525 (dotimes (x (current-indentation sink))
526 (sink-write-rune #/U+0020 sink)))
528 (defun start-indentation-block (sink)
529 (incf (current-indentation sink) (indentation sink)))
531 (defun end-indentation-block (sink)
532 (decf (current-indentation sink) (indentation sink)))
534 (defun unparse-indented-text (data sink)
535 (flet ((whitespacep (x)
536 (or (rune= x #/U+000A) (rune= x #/U+0020))))
537 (let* ((n (length data))
538 (pos (position-if-not #'whitespacep data))
539 (need-whitespace-p nil))
540 (cond
541 ((zerop n))
542 (pos
543 (sink-fresh-line sink)
544 (while (< pos n)
545 (let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n))
546 (next (or (position-if-not #'whitespacep data :start w) n)))
547 (when need-whitespace-p
548 (if (< (+ (ystream-column (sink-ystream sink)) w (- pos))
549 (width sink))
550 (sink-write-rune #/U+0020 sink)
551 (sink-fresh-line sink)))
552 (sink-write-escapable-rod data sink :start pos :end w)
553 (setf need-whitespace-p (< w n))
554 (setf pos next))))
556 (sink-write-rune #/U+0020 sink))))))
558 (defun sink-write-escapable-rod (rod sink &key (start 0) (end (length rod)))
560 ;; OPTIMIZE ME
562 (let ((y (sink-ystream sink)))
563 (loop
564 for i from start below end
565 for c = (rune rod i)
567 (case c
568 (#/& (ystream-write-escapable-rod #.(string-rod "&amp;") y))
569 (#/< (ystream-write-escapable-rod #.(string-rod "&lt;") y))
570 ;; there's no need to escape > per se, but we're supposed to
571 ;; escape -->, which is harder to check for
572 (#/> (ystream-write-escapable-rod #.(string-rod "&gt;") y))
573 (#/U+000D (ystream-write-escapable-rod #.(string-rod "&#13;") y))
574 (t (ystream-write-escapable-rune c y))))))
576 (defun sink-write-escapable-rod/attribute
577 (rod sink &key (start 0) (end (length rod)))
579 ;; OPTIMIZE ME
581 (let ((y (sink-ystream sink)))
582 (loop
583 for i from start below end
584 for c = (rune rod i)
586 (case c
587 (#/& (ystream-write-escapable-rod #.(string-rod "&amp;") y))
588 (#/< (ystream-write-escapable-rod #.(string-rod "&lt;") y))
589 ;; there's no need to escape > per se, but we're supposed to
590 ;; escape -->, which is harder to check for
591 (#/> (ystream-write-escapable-rod #.(string-rod "&gt;") y))
592 (#/\" (ystream-write-escapable-rod #.(string-rod "&quot;") y))
593 (#/U+0009 (ystream-write-escapable-rod #.(string-rod "&#9;") y))
594 (#/U+000A (ystream-write-escapable-rod #.(string-rod "&#10;") y))
595 (#/U+000D (ystream-write-escapable-rod #.(string-rod "&#13;") y))
596 (t (ystream-write-escapable-rune c y))))))
598 (defun sink-write-escapable-rod/canonical
599 (rod sink &key (start 0) (end (length rod)))
601 ;; OPTIMIZE ME
603 (let ((y (sink-ystream sink)))
604 (loop
605 for i from start below end
606 for c = (rune rod i)
608 (case c
609 (#/& (ystream-write-escapable-rod #.(string-rod "&amp;") y))
610 (#/< (ystream-write-escapable-rod #.(string-rod "&lt;") y))
611 (#/> (ystream-write-escapable-rod #.(string-rod "&gt;") y))
612 (#/\" (ystream-write-escapable-rod #.(string-rod "&quot;") y))
613 (#/U+0009 (ystream-write-escapable-rod #.(string-rod "&#9;") y))
614 (#/U+000A (ystream-write-escapable-rod #.(string-rod "&#10;") y))
615 (#/U+000D (ystream-write-escapable-rod #.(string-rod "&#13;") y))
616 (t (ystream-write-escapable-rune c y))))))
618 (defun sink-write-escapable-rod/dtd
619 (rod sink &key (start 0) (end (length rod)))
620 (let ((y (sink-ystream sink)))
621 (loop
622 for i from start below end
623 for c = (rune rod i)
625 (case c
626 (#/% (ystream-write-escapable-rod #.(string-rod "&#37;") y))
627 (#/& (ystream-write-escapable-rod #.(string-rod "&amp;") y))
628 (#/< (ystream-write-escapable-rod #.(string-rod "&lt;") y))
629 (#/> (ystream-write-escapable-rod #.(string-rod "&gt;") y))
630 (#/\" (ystream-write-escapable-rod #.(string-rod "&quot;") y))
631 (#/U+0009 (ystream-write-escapable-rod #.(string-rod "&#9;") y))
632 (#/U+000A (ystream-write-escapable-rod #.(string-rod "&#10;") y))
633 (#/U+000D (ystream-write-escapable-rod #.(string-rod "&#13;") y))
634 (t (ystream-write-escapable-rune c y))))))
636 (defun sink-write-rune (c sink)
637 (ystream-write-rune c (sink-ystream sink)))
639 (defun sink-write-rod (r sink)
640 (ystream-write-rod r (sink-ystream sink)))
643 ;;;; convenience functions for DOMless XML serialization
645 (defvar *current-element*)
646 (defvar *sink*)
647 (defvar *unparse-namespace-bindings*)
648 (defvar *current-namespace-bindings*)
650 (defmacro with-xml-output (sink &body body)
651 `(invoke-with-xml-output (lambda () ,@body) ,sink))
653 (defmacro with-output-sink ((var) &body body)
654 `(invoke-with-output-sink (lambda (,var) ,@body)))
656 (defun invoke-with-xml-output (fn sink)
657 (let ((*sink* sink)
658 (*current-element* nil)
659 (*unparse-namespace-bindings* *initial-namespace-bindings*)
660 (*current-namespace-bindings* nil))
661 (sax:start-document *sink*)
662 (funcall fn)
663 (sax:end-document *sink*)))
665 (defun invoke-with-output-sink (fn)
666 (maybe-emit-start-tag)
667 (funcall fn *sink*))
669 (defmacro with-element (qname &body body)
670 `(invoke-with-element (lambda () ,@body) ,qname))
672 (defmacro with-element* ((prefix lname) &body body)
673 `(invoke-with-element* (lambda () ,@body) ,prefix ,lname))
675 (defmacro with-namespace ((prefix uri) &body body)
676 `(invoke-with-namespace (lambda () ,@body) ,prefix ,uri))
678 (defun doctype (name public-id system-id &optional internal-subset)
679 (sax:start-dtd *sink* name public-id system-id)
680 (when internal-subset
681 (sax:unparsed-internal-subset *sink* internal-subset))
682 (sax:end-dtd *sink*))
684 (defun maybe-emit-start-tag ()
685 (when *current-element*
686 ;; starting child node, need to emit opening tag of parent first:
687 (destructuring-bind ((uri lname qname) &rest attributes) *current-element*
688 (sax:start-element *sink* uri lname qname (reverse attributes)))
689 (setf *current-element* nil)))
691 (defun invoke-with-namespace (fn prefix uri)
692 (let ((*unparse-namespace-bindings*
693 (acons prefix uri *unparse-namespace-bindings*))
694 (*current-namespace-bindings*
695 (acons prefix uri *current-namespace-bindings*)))
696 (sax:start-prefix-mapping *sink* prefix uri)
697 (multiple-value-prog1
698 (funcall fn)
699 (sax:end-prefix-mapping *sink* prefix))))
701 (defun invoke-with-element (fn qname)
702 (setf qname (rod qname))
703 (multiple-value-bind (prefix lname)
704 (split-qname qname)
705 (invoke-with-element* fn prefix lname qname)))
707 (defun find-unparse-namespace (prefix)
708 (cdr (assoc prefix *unparse-namespace-bindings* :test 'equal)))
710 (defun invoke-with-element* (fn prefix lname &optional qname)
711 (setf prefix (when prefix (rod prefix)))
712 (setf lname (rod lname))
713 (maybe-emit-start-tag)
714 (let* ((qname (or qname
715 (if prefix (concatenate 'rod prefix #":" lname) lname)))
716 (uri (find-unparse-namespace (or prefix #"")))
717 (*current-element*
718 (cons (list uri lname qname)
719 (mapcar (lambda (x)
720 (destructuring-bind (prefix &rest uri) x
721 (sax:make-attribute
722 :namespace-uri #"http://www.w3.org/2000/xmlns/"
723 :local-name prefix
724 :qname (if (zerop (length prefix))
725 #"xmlns"
726 (concatenate 'rod #"xmlns:" prefix))
727 :value uri)))
728 *current-namespace-bindings*))))
729 (multiple-value-prog1
730 (let ((*current-namespace-bindings* nil))
731 (funcall fn))
732 (maybe-emit-start-tag)
733 (sax:end-element *sink* uri lname qname))))
735 (defgeneric unparse-attribute (value))
736 (defmethod unparse-attribute ((value string)) value)
737 (defmethod unparse-attribute ((value null)) nil)
738 (defmethod unparse-attribute ((value integer)) (write-to-string value))
740 (defun attribute (qname value)
741 (setf qname (rod qname))
742 (multiple-value-bind (prefix lname)
743 (split-qname qname)
744 (attribute* prefix lname value qname)))
746 (defun attribute* (prefix lname value &optional qname)
747 (setf value (unparse-attribute value))
748 (when value
749 (setf prefix (when prefix (rod prefix)))
750 (setf lname (rod lname))
751 (push (sax:make-attribute
752 :namespace-uri (find-unparse-namespace prefix)
753 :local-name lname
754 :qname (or qname
755 (if prefix (concatenate 'rod prefix #":" lname) lname))
756 :value (rod value))
757 (cdr *current-element*))))
759 (defun cdata (data)
760 (maybe-emit-start-tag)
761 (sax:start-cdata *sink*)
762 (sax:characters *sink* (rod data))
763 (sax:end-cdata *sink*)
764 data)
766 (defun text (data)
767 (maybe-emit-start-tag)
768 (sax:characters *sink* (rod data))
769 data)
771 (defun comment (data)
772 (maybe-emit-start-tag)
773 (sax:comment *sink* (rod data))
774 data)
776 (defun processing-instruction (target data)
777 (maybe-emit-start-tag)
778 (sax:processing-instruction *sink* (rod target) (rod data))
779 data)
781 (defun unescaped (str)
782 (maybe-emit-start-tag)
783 (sax:unescaped *sink* (rod str)))