1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: XML; readtable: glisp; Encoding: utf-8; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Dump canonic XML according to J.Clark
4 ;;; Created: 1999-09-09
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; License: LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; © copyright 1999 by Gilbert Baumann
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
31 ;; | This document defines a subset of XML called canonical XML. The
32 ;; | intended use of canonical XML is in testing XML processors, as a
33 ;; | representation of the result of parsing an XML document.
35 ;; | Every well-formed XML document has a unique structurally equivalent
36 ;; | canonical XML document. Two structurally equivalent XML documents have
37 ;; | a byte-for-byte identical canonical XML document. Canonicalizing an
38 ;; | XML document requires only information that an XML processor is
39 ;; | required to make available to an application.
41 ;; | A canonical XML document conforms to the following grammar:
43 ;; | CanonXML ::= Pi* element Pi*
44 ;; | element ::= Stag (Datachar | Pi | element)* Etag
45 ;; | Stag ::= '<' Name Atts '>'
46 ;; | Etag ::= '</' Name '>'
47 ;; | Pi ::= '<?' Name ' ' (((Char - S) Char*)? - (Char* '?>' Char*)) '?>'
48 ;; | Atts ::= (' ' Name '=' '"' Datachar* '"')*
49 ;; | Datachar ::= '&' | '<' | '>' | '"'
50 ;; | | '	'| ' '| ' '
51 ;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD))
52 ;; | Name ::= (see XML spec)
53 ;; | Char ::= (see XML spec)
54 ;; | S ::= (see XML spec)
56 ;; | Attributes are in lexicographical order (in Unicode bit order).
58 ;; | A canonical XML document is encoded in UTF-8.
60 ;; | Ignorable white space is considered significant and is treated
61 ;; | equivalently to data.
63 ;; -- James Clark (jjc@jclark.com)
65 (defvar *quux
*) ;!!!BIG HACK!!!
67 (defun unparse-document (doc sink
)
68 (mapc (rcurry #'unparse-node sink
) (dom:child-nodes doc
)))
70 (defun unparse-node (node sink
)
71 (cond ((dom:element-p node
)
73 (write-rod (dom:tag-name node
) sink
)
75 (let ((atts (sort (copy-list (dom:items
(dom:attributes node
)))
76 #'rod
< :key
#'dom
:name
)))
78 (write-rune #/space sink
)
79 (write-rod (dom:name a
) sink
)
81 (write-rune #/\" sink
)
83 (map nil
(lambda (c) (unparse-datachar c sink
)) (dom:value a
)))
84 (write-rune #/\" sink
)))
85 (write-rod '#.
(string-rod ">") sink
)
86 (dolist (k (dom:child-nodes node
))
87 (unparse-node k sink
))
88 (write-rod '#.
(string-rod "</") sink
)
89 (write-rod (dom:tag-name node
) sink
)
90 (write-rod '#.
(string-rod ">") sink
))
91 ((dom:processing-instruction-p node
)
92 (unless (rod-equal (dom:target node
) '#.
(string-rod "xml"))
93 (write-rod '#.
(string-rod "<?") sink
)
94 (write-rod (dom:target node
) sink
)
95 (write-rune #/space sink
)
96 (write-rod (dom:data node
) sink
)
97 (write-rod '#.
(string-rod "?>") sink
) ))
98 ((dom:text-node-p node
)
100 (map nil
(lambda (c) (unparse-datachar c sink
))
103 (error "Oops in unparse: ~S." node
))))
105 (defun unparse-datachar (c sink
)
106 (cond ((rune= c
#/&) (write-rod '#.
(string-rod "&") sink
))
107 ((rune= c
#/<) (write-rod '#.
(string-rod "<") sink
))
108 ((rune= c
#/>) (write-rod '#.
(string-rod ">") sink
))
109 ((rune= c
#/\") (write-rod '#.
(string-rod """) sink
))
110 ((rune= c
#/U
+0009) (write-rod '#.
(string-rod "	") sink
))
111 ((rune= c
#/U
+000A
) (write-rod '#.
(string-rod " ") sink
))
112 ((rune= c
#/U
+000D
) (write-rod '#.
(string-rod " ") sink
))
114 (write-rune c sink
))))
116 (defun write-rod (rod sink
)
118 (map nil
(lambda (c) (write-rune c sink
)) rod
)))
120 (defun write-rune (rune sink
)
121 (cond ((<= #xD800 rune
#xDBFF
)
123 ((<= #xDC00 rune
#xDFFF
)
124 (let ((q (logior (ash (- *quux
* #xD7C0
) 10) (- rune
#xDC00
))))
125 (write-rune-0 q sink
))
128 (write-rune-0 rune sink
))))
130 (defun write-rune-0 (rune sink
)
132 (write-char (code-char x
) sink
)))
133 (cond ((<= #x00000000 rune
#x0000007F
)
135 ((<= #x00000080 rune
#x000007FF
)
136 (wr (logior #b11000000
(ldb (byte 5 6) rune
)))
137 (wr (logior #b10000000
(ldb (byte 6 0) rune
))))
138 ((<= #x00000800 rune
#x0000FFFF
)
139 (wr (logior #b11100000
(ldb (byte 4 12) rune
)))
140 (wr (logior #b10000000
(ldb (byte 6 6) rune
)))
141 (wr (logior #b10000000
(ldb (byte 6 0) rune
))))
142 ((<= #x00010000 rune
#x001FFFFF
)
143 (wr (logior #b11110000
(ldb (byte 3 18) rune
)))
144 (wr (logior #b10000000
(ldb (byte 6 12) rune
)))
145 (wr (logior #b10000000
(ldb (byte 6 6) rune
)))
146 (wr (logior #b10000000
(ldb (byte 6 0) rune
))))
147 ((<= #x00200000 rune
#x03FFFFFF
)
148 (wr (logior #b11111000
(ldb (byte 2 24) rune
)))
149 (wr (logior #b10000000
(ldb (byte 6 18) rune
)))
150 (wr (logior #b10000000
(ldb (byte 6 12) rune
)))
151 (wr (logior #b10000000
(ldb (byte 6 6) rune
)))
152 (wr (logior #b10000000
(ldb (byte 6 0) rune
))))
153 ((<= #x04000000 rune
#x7FFFFFFF
)
154 (wr (logior #b11111100
(ldb (byte 1 30) rune
)))
155 (wr (logior #b10000000
(ldb (byte 6 24) rune
)))
156 (wr (logior #b10000000
(ldb (byte 6 18) rune
)))
157 (wr (logior #b10000000
(ldb (byte 6 12) rune
)))
158 (wr (logior #b10000000
(ldb (byte 6 6) rune
)))
159 (wr (logior #b10000000
(ldb (byte 6 0) rune
)))))))
161 (defun rod< (rod1 rod2
)
164 (cond ((= i
(length rod1
))
168 ((< (aref rod1 i
) (aref rod2 i
))
170 ((> (aref rod1 i
) (aref rod2 i
))