1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; -*-
2 ;;; ---------------------------------------------------------------------------
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; Author: Henrik Motakef <hmot@henrik-motakef.de>
7 ;;; Author: David Lichteblau <david@lichteblau.com>
8 ;;; License: Lisp-LGPL (See file COPYING for details).
9 ;;; ---------------------------------------------------------------------------
10 ;;; (c) copyright 1999 by Gilbert Baumann
11 ;;; (c) copyright 2003 by Henrik Motakef
12 ;;; (c) copyright 2004 knowledgeTools Int. GmbH
13 ;;; (c) copyright 2004 David Lichteblau
14 ;;; (c) copyright 2005 David Lichteblau
16 ;;; This library is free software; you can redistribute it and/or
17 ;;; modify it under the terms of the GNU Library General Public
18 ;;; License as published by the Free Software Foundation; either
19 ;;; version 2 of the License, or (at your option) any later version.
21 ;;; This library is distributed in the hope that it will be useful,
22 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 ;;; Library General Public License for more details.
26 ;;; You should have received a copy of the GNU Library General Public
27 ;;; License along with this library; if not, write to the
28 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
29 ;;; Boston, MA 02111-1307 USA.
35 ;; For reading runes, I defined my own streams, called xstreams,
36 ;; because we want to be fast. A function call or even a method call
37 ;; per character is not acceptable, instead of that we define a
38 ;; buffered stream with and advertised buffer layout, so that we
39 ;; could use the trick stdio uses: READ-RUNE and PEEK-RUNE are macros,
40 ;; directly accessing the buffer and only calling some underflow
41 ;; handler in case of stream underflows. This will yield to quite a
42 ;; performance boost vs calling READ-BYTE per character.
44 ;; Also we need to do encoding t conversion on ; this better done at large chunks of data rather than on a character
45 ;; by character basis. This way we need a dispatch on the active
46 ;; encoding only once in a while, instead of for each character. This
47 ;; allows us to use a CLOS interface to do the underflow handling.
51 ;; Now, for reading tokens, we define another kind of streams, called
52 ;; zstreams. These zstreams also maintain an input stack to implement
53 ;; inclusion of external entities. This input stack contains xstreams
54 ;; or the special marker :STOP. Such a :STOP marker indicates, that
55 ;; input should not continue there, but well stop; that is simulate an
56 ;; EOF. The user is then responsible to pop this marker off the input
59 ;; This input stack is also used to detect circular entity inclusion.
61 ;; The zstream tokenizer recognizes the following types of tokens and
62 ;; is controlled by the *DATA-BEHAVIOUR* flag. (Which should become a
63 ;; slot of zstreams instead).
66 ;; :xml-decl (<target> . <content>) ;processing-instruction starting with "<?xml"
67 ;; :pi (<target> . <content>) ;processing-instruction
68 ;; :stag (<name> . <atts>) ;start tag
69 ;; :etag (<name> . <atts>) ;end tag
70 ;; :ztag (<name> . <atts>) ;empty tag
79 ;; *data-behaviour* = :DTD
81 ;; :nmtoken <interned-rod>
87 ;; :\[ :\] :\( :\) :|\ :\> :\" :\' :\, :\? :\* :\+
89 ;; *data-behaviour* = :DOC
91 ;; :entity-ref <interned-rod>
97 ;; o provide for a faster DOM
99 ;; o morph zstream into a context object and thus also get rid of
100 ;; special variables. Put the current DTD there too.
103 ;; o the *scratch-pad* hack should become something much more
104 ;; reentrant, we could either define a system-wide resource
105 ;; or allocate some scratch-pads per context.
106 ;; [for thread-safety reasons the array are allocated per context now,
107 ;; reentrancy is still open]
109 ;; o CR handling in utf-16 deocders
113 ;; o max depth together with circle detection
114 ;; (or proof, that our circle detection is enough).
115 ;; [gemeint ist zstream-push--david]
117 ;; o better extensibility wrt character representation, one may want to
119 ;; - UCS-4 in vectoren
121 ;; o xstreams auslagern, documententieren und dann auch in SGML und
122 ;; CSS parser verwenden. (halt alles was zeichen liest).
123 ;; [ausgelagert sind sie; dokumentiert "so la la"; die Reintegration
124 ;; in Closure ist ein ganz anderes Thema]
126 ;; o recording of source locations for nodes.
128 ;; o based on the DTD and xml:space attribute implement HTML white
131 ;; o on a parser option, do not expand external entities.
133 ;;;; Validity constraints:
134 ;;;; (00) Root Element Type like (03), c.f. MAKE-ROOT-MODEL
135 ;;;; (01) Proper Declaration/PE Nesting P/MARKUP-DECL
136 ;;;; (02) Standalone Document Declaration all over the place [*]
137 ;;;; (03) Element Valid VALIDATE-*-ELEMENT, -CHARACTERS
138 ;;;; (04) Attribute Value Type VALIDATE-ATTRIBUTE
139 ;;;; (05) Unique Element Type Declaration DEFINE-ELEMENT
140 ;;;; (06) Proper Group/PE Nesting P/CSPEC
141 ;;;; (07) No Duplicate Types LEGAL-CONTENT-MODEL-P
142 ;;;; (08) ID VALIDATE-ATTRIBUTE
143 ;;;; (09) One ID per Element Type DEFINE-ATTRIBUTE
144 ;;;; (10) ID Attribute Default DEFINE-ATTRIBUTE
145 ;;;; (11) IDREF VALIDATE-ATTRIBUTE, P/DOCUMENT
146 ;;;; (12) Entity Name VALIDATE-ATTRIBUTE
147 ;;;; (13) Name Token VALIDATE-ATTRIBUTE
148 ;;;; (14) Notation Attributes VALIDATE-ATTRIBUTE, P/ATT-TYPE
149 ;;;; (15) One Notation Per Element Type DEFINE-ATTRIBUTE
150 ;;;; (16) No Notation on Empty Element DEFINE-ELEMENT, -ATTRIBUTE
151 ;;;; (17) Enumeration VALIDATE-ATTRIBUTE
152 ;;;; (18) Required Attribute PROCESS-ATTRIBUTES
153 ;;;; (19) Attribute Default Legal DEFINE-ATTRIBUTE
154 ;;;; (20) Fixed Attribute Default VALIDATE-ATTRIBUTE
155 ;;;; (21) Proper Conditional Section/PE Nesting P/CONDITIONAL-SECT, ...
156 ;;;; (22) Entity Declared [**]
157 ;;;; (23) Notation Declared P/ENTITY-DEF, P/DOCUMENT
158 ;;;; (24) Unique Notation Name DEFINE-NOTATION
160 ;;;; [*] Perhaps we could revert the explicit checks of (02), if we did
161 ;;;; _not_ read external subsets of standalone documents when parsing in
162 ;;;; validating mode. Violations of VC (02) constraints would then appear as
163 ;;;; wellformedness violations, right?
165 ;;;; [**] Although I haven't investigated this properly yet, I believe that
166 ;;;; we check this VC together with the WFC even in non-validating mode.
171 (setf (excl:named-readtable
:runes
) *readtable
*)
173 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
174 (defparameter *fast
* '(optimize (speed 3) (safety 0)))
175 ;;(defparameter *fast* '(optimize (speed 2) (safety 3)))
182 (defstruct (context (:conc-name nil
))
186 ;; xml:base machen wir fuer klacks mal gleich als expliziten stack:
188 (referenced-notations '())
189 (id-table (%make-rod-hash-table
))
190 ;; FIXME: Wofuer ist name-hashtable da? Will man das wissen?
191 (name-hashtable (make-rod-hashtable :size
2000))
193 (entity-resolver nil
)
194 (disallow-internal-subset nil
)
197 (defvar *expand-pe-p
* nil
)
199 (defparameter *initial-namespace-bindings
*
201 (#"xmlns" .
#"http://www.w3.org/2000/xmlns/")
202 (#"xml" .
#"http://www.w3.org/XML/1998/namespace")))
204 (defparameter *namespace-bindings
* *initial-namespace-bindings
*)
206 ;;;; ---------------------------------------------------------------------------
211 (defstruct (stream-name
212 (:print-function print-stream-name
))
217 (defun print-stream-name (object stream depth
)
218 (declare (ignore depth
))
219 (format stream
"[~A ~S ~A]"
220 (rod-string (stream-name-entity-name object
))
221 (stream-name-entity-kind object
)
222 (stream-name-uri object
)))
224 (deftype read-element
() 'rune
)
226 (defun call-with-open-xstream (fn stream
)
229 (close-xstream stream
)))
231 (defmacro with-open-xstream
((var value
) &body body
)
232 `(call-with-open-xstream (lambda (,var
) ,@body
) ,value
))
234 (defun call-with-open-xfile (continuation &rest open-args
)
235 (let ((input (apply #'open
(car open-args
) :element-type
'(unsigned-byte 8) (cdr open-args
))))
238 (funcall continuation
(make-xstream input
)))
241 (defmacro with-open-xfile
((stream &rest open-args
) &body body
)
242 `(call-with-open-xfile (lambda (,stream
) .
,body
) .
,open-args
))
244 ;;;; -------------------------------------------------------------------
245 ;;;; Rechnen mit Runen
248 ;; Let us first define fast fixnum arithmetric get rid of type
249 ;; checks. (After all we know what we do here).
251 (defmacro fx-op
(op &rest xs
)
252 `(the fixnum
(,op
,@(mapcar (lambda (x) `(the fixnum
,x
)) xs
))))
253 (defmacro fx-pred
(op &rest xs
)
254 `(,op
,@(mapcar (lambda (x) `(the fixnum
,x
)) xs
)))
256 (defmacro %
+ (&rest xs
) `(fx-op + ,@xs
))
257 (defmacro %-
(&rest xs
) `(fx-op -
,@xs
))
258 (defmacro %
* (&rest xs
) `(fx-op * ,@xs
))
259 (defmacro %
/ (&rest xs
) `(fx-op floor
,@xs
))
260 (defmacro %and
(&rest xs
) `(fx-op logand
,@xs
))
261 (defmacro %ior
(&rest xs
) `(fx-op logior
,@xs
))
262 (defmacro %xor
(&rest xs
) `(fx-op logxor
,@xs
))
263 (defmacro %ash
(&rest xs
) `(fx-op ash
,@xs
))
264 (defmacro %mod
(&rest xs
) `(fx-op mod
,@xs
))
266 (defmacro %
= (&rest xs
) `(fx-pred = ,@xs
))
267 (defmacro %
<= (&rest xs
) `(fx-pred <= ,@xs
))
268 (defmacro %
>= (&rest xs
) `(fx-pred >= ,@xs
))
269 (defmacro %
< (&rest xs
) `(fx-pred < ,@xs
))
270 (defmacro %
> (&rest xs
) `(fx-pred > ,@xs
))
272 ;;; XXX Geschwindigkeit dieser Definitionen untersuchen!
274 (defmacro rune-op
(op &rest xs
)
275 `(code-rune (,op
,@(mapcar (lambda (x) `(rune-code ,x
)) xs
))))
276 (defmacro rune-pred
(op &rest xs
)
277 `(,op
,@(mapcar (lambda (x) `(rune-code ,x
)) xs
)))
279 (defmacro %rune
+ (&rest xs
) `(rune-op + ,@xs
))
280 (defmacro %rune-
(&rest xs
) `(rune-op -
,@xs
))
281 (defmacro %rune
* (&rest xs
) `(rune-op * ,@xs
))
282 (defmacro %rune
/ (&rest xs
) `(rune-op floor
,@xs
))
283 (defmacro %rune-and
(&rest xs
) `(rune-op logand
,@xs
))
284 (defmacro %rune-ior
(&rest xs
) `(rune-op logior
,@xs
))
285 (defmacro %rune-xor
(&rest xs
) `(rune-op logxor
,@xs
))
286 (defmacro %rune-ash
(a b
) `(code-rune (ash (rune-code ,a
) ,b
)))
287 (defmacro %rune-mod
(&rest xs
) `(rune-op mod
,@xs
))
289 (defmacro %rune
= (&rest xs
) `(rune-pred = ,@xs
))
290 (defmacro %rune
<= (&rest xs
) `(rune-pred <= ,@xs
))
291 (defmacro %rune
>= (&rest xs
) `(rune-pred >= ,@xs
))
292 (defmacro %rune
< (&rest xs
) `(rune-pred < ,@xs
))
293 (defmacro %rune
> (&rest xs
) `(rune-pred > ,@xs
))
295 ;;;; ---------------------------------------------------------------------------
299 ;;; make-rod-hashtable
300 ;;; rod-hash-get hashtable rod &optional start end -> value ; successp
301 ;;; (setf (rod-hash-get hashtable rod &optional start end) new-value
304 (defstruct (rod-hashtable (:constructor make-rod-hashtable
/low
))
309 (defun make-rod-hashtable (&key
(size 200))
310 (setf size
(nearest-greater-prime size
))
311 (make-rod-hashtable/low
313 :table
(make-array size
:initial-element nil
)))
315 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
316 (defconstant +fixnum-bits
+
317 (1- (integer-length most-positive-fixnum
))
318 "Pessimistic approximation of the number of bits of fixnums.")
320 (defconstant +fixnum-mask
+
321 (1- (expt 2 +fixnum-bits
+))
322 "Pessimistic approximation of the largest bit-mask, still being a fixnum."))
324 (definline stir
(a b
)
326 (%xor
(%ior
(%ash
(%and a
#.
(ash +fixnum-mask
+ -
5)) 5)
327 (%ash a
#.
(- 5 +fixnum-bits
+)))
330 (definline rod-hash
(rod start end
)
331 "Compute a hash code out of a rod."
332 (let ((res (%- end start
)))
333 (do ((i start
(%
+ i
1)))
335 (declare (type fixnum i
))
336 (setf res
(stir res
(rune-code (%rune rod i
)))))
339 (definline rod
=* (x y
&key
(start1 0) (end1 (length x
))
340 (start2 0) (end2 (length y
)))
341 (and (%
= (%- end1 start1
) (%- end2 start2
))
342 (do ((i start1
(%
+ i
1))
346 (unless (rune= (%rune x i
) (%rune y j
))
349 (definline rod
=** (x y start1 end1 start2 end2
)
350 (and (%
= (%- end1 start1
) (%- end2 start2
))
351 (do ((i start1
(%
+ i
1))
355 (unless (rune= (%rune x i
) (%rune y j
))
358 (defun rod-hash-get (hashtable rod
&optional
(start 0) (end (length rod
)))
359 (declare (type (simple-array rune
(*)) rod
))
360 (let ((j (%mod
(rod-hash rod start end
)
361 (rod-hashtable-size hashtable
))))
362 (dolist (q (svref (rod-hashtable-table hashtable
) j
)
363 (values nil nil nil
))
364 (declare (type cons q
))
365 (when (rod=** (car q
) rod
0 (length (the (simple-array rune
(*)) (car q
))) start end
)
366 (return (values (cdr q
) t
(car q
)))))))
368 (defun rod-hash-set (new-value hashtable rod
&optional
(start 0) (end (length rod
)))
369 (let ((j (%mod
(rod-hash rod start end
)
370 (rod-hashtable-size hashtable
)))
372 (dolist (q (svref (rod-hashtable-table hashtable
) j
)
374 (setf key
(rod-subseq* rod start end
))
375 (push (cons key new-value
)
376 (aref (rod-hashtable-table hashtable
) j
))))
377 (when (rod=* (car q
) rod
:start2 start
:end2 end
)
379 (setf (cdr q
) new-value
)
381 (values new-value key
)))
384 (defun rod-subseq* (source start
&optional
(end (length source
)))
385 (unless (and (typep start
'fixnum
) (>= start
0))
386 (error "~S is not a non-negative fixnum." start
))
387 (unless (and (typep end
'fixnum
) (>= end start
))
388 (error "END argument, ~S, is not a fixnum no less than START, ~S." end start
))
389 (when (> start
(length source
))
390 (error "START argument, ~S, should be no greater than length of rod." start
))
391 (when (> end
(length source
))
392 (error "END argument, ~S, should be no greater than length of rod." end
))
394 (declare (type fixnum start end
))
395 (let ((res (make-rod (- end start
))))
396 (declare (type rod res
))
397 (do ((i (- (- end start
) 1) (the fixnum
(- i
1))))
399 (declare (type fixnum i
))
400 (setf (%rune res i
) (aref source
(the fixnum
(+ i start
))))))))
403 (defun rod-subseq* (source start
&optional
(end (length source
)))
404 (subseq source start end
))
406 (deftype ufixnum
() `(unsigned-byte ,(integer-length most-positive-fixnum
)))
409 (defun rod-subseq** (source start
&optional
(end (length source
)))
410 (declare (type (simple-array rune
(*)) source
)
413 (optimize (speed 3) (safety 0)))
414 (let ((res (make-array (%- end start
) :element-type
'rune
)))
415 (declare (type (simple-array rune
(*)) res
))
416 (let ((i (%- end start
)))
417 (declare (type ufixnum i
))
422 (setf (%rune res i
) (%rune source
(the ufixnum
(+ i start
))))))
426 (defun rod-subseq** (source start
&optional
(end (length source
)))
427 (subseq source start end
))
429 (defun (setf rod-hash-get
) (new-value hashtable rod
&optional
(start 0) (end (length rod
)))
430 (rod-hash-set new-value hashtable rod start end
))
432 (defun intern-name (rod &optional
(start 0) (end (length rod
)))
433 (multiple-value-bind (value successp key
) (rod-hash-get (name-hashtable *ctx
*) rod start end
)
434 (declare (ignore value
))
437 (nth-value 1 (rod-hash-set t
(name-hashtable *ctx
*) rod start end
)))))
439 ;;;; ---------------------------------------------------------------------------
444 (defvar *scratch-pad
*)
445 (defvar *scratch-pad-2
*)
446 (defvar *scratch-pad-3
*)
447 (defvar *scratch-pad-4
*)
449 (declaim (type (simple-array rune
(*))
450 *scratch-pad
* *scratch-pad-2
* *scratch-pad-3
* *scratch-pad-4
*))
452 (defmacro with-scratch-pads
((&optional
) &body body
)
453 `(let ((*scratch-pad
* (make-array 1024 :element-type
'rune
))
454 (*scratch-pad-2
* (make-array 1024 :element-type
'rune
))
455 (*scratch-pad-3
* (make-array 1024 :element-type
'rune
))
456 (*scratch-pad-4
* (make-array 1024 :element-type
'rune
)))
459 (defmacro %put-unicode-char
(code-var put
)
461 (cond #+rune-is-utf-16
462 ((%
> ,code-var
#xFFFF
)
463 (,put
(the rune
(code-rune (%
+ #xD7C0
(%ash
,code-var -
10)))))
464 (,put
(the rune
(code-rune (%ior
#xDC00
(%and
,code-var
#x03FF
))))))
466 (,put
(code-rune ,code-var
))))))
468 (defun adjust-array-by-copying (old-array new-size
)
469 "Adjust an array by copying and thus ensures, that result is a SIMPLE-ARRAY."
470 (let ((res (make-array new-size
:element-type
(array-element-type old-array
))))
471 (replace res old-array
472 :start1
0 :end1
(length old-array
)
473 :start2
0 :end2
(length old-array
))
476 (defmacro with-rune-collector-aux
(scratch collect body mode
)
481 `(let ((,n
(length ,scratch
))
484 (declare (type fixnum
,n
,i
))
490 (when (%
>= ,',i
,',n
)
491 (setf ,',n
(* 2 ,',n
))
494 (adjust-array-by-copying ,',scratch
,',n
))))
495 (setf (aref (the (simple-array rune
(*)) ,',b
) ,',i
) x
)
501 `(intern-name ,b
0 ,i
))
503 `(let ((,rod
(make-rod ,i
)))
504 (while (not (%
= ,i
0))
506 (setf (%rune
,rod
,i
)
507 (aref (the (simple-array rune
(*)) ,b
) ,i
)))
513 '(defmacro with-rune-collector-aux
(scratch collect body mode
)
518 `(let ((,n
(length ,scratch
))
520 (declare (type fixnum
,n
,i
))
526 (when (%
>= ,',i
,',n
)
527 (setf ,',n
(* 2 ,',n
))
530 (adjust-array-by-copying ,',scratch
,',n
))))
531 (setf (aref (the (simple-array rune
(*)) ,',scratch
) ,',i
) x
)
537 `(intern-name ,scratch
0 ,i
))
539 `(let ((,rod
(make-rod ,i
)))
542 (setf (%rune
,rod
,i
)
543 (aref (the (simple-array rune
(*)) ,scratch
) ,i
)))
546 `(values ,scratch
0 ,i
))
549 (defmacro with-rune-collector
((collect) &body body
)
550 `(with-rune-collector-aux *scratch-pad
* ,collect
,body
:copy
))
552 (defmacro with-rune-collector-2
((collect) &body body
)
553 `(with-rune-collector-aux *scratch-pad-2
* ,collect
,body
:copy
))
555 (defmacro with-rune-collector-3
((collect) &body body
)
556 `(with-rune-collector-aux *scratch-pad-3
* ,collect
,body
:copy
))
558 (defmacro with-rune-collector-4
((collect) &body body
)
559 `(with-rune-collector-aux *scratch-pad-4
* ,collect
,body
:copy
))
561 (defmacro with-rune-collector
/intern
((collect) &body body
)
562 `(with-rune-collector-aux *scratch-pad
* ,collect
,body
:intern
))
564 (defmacro with-rune-collector
/raw
((collect) &body body
)
565 `(with-rune-collector-aux *scratch-pad
* ,collect
,body
:raw
))
568 (defmacro while-reading-runes
((reader stream-in
) &rest body
)
569 ;; Thou shalt not leave body via a non local exit
570 (let ((stream (make-symbol "STREAM"))
571 (rptr (make-symbol "RPTR"))
572 (fptr (make-symbol "FPTR"))
573 (buf (make-symbol "BUF")) )
574 `(let* ((,stream
,stream-in
)
575 (,rptr
(xstream-read-ptr ,stream
))
576 (,fptr
(xstream-fill-ptr ,stream
))
577 (,buf
(xstream-buffer ,stream
)))
578 (declare (type fixnum
,rptr
,fptr
)
579 (type xstream
,stream
))
580 (macrolet ((,reader
(res-var)
581 `(cond ((%
= ,',rptr
,',fptr
)
582 (setf (xstream-read-ptr ,',stream
) ,',rptr
)
583 (setf ,res-var
(xstream-underflow ,',stream
))
584 (setf ,',rptr
(xstream-read-ptr ,',stream
))
585 (setf ,',fptr
(xstream-fill-ptr ,',stream
))
586 (setf ,',buf
(xstream-buffer ,',stream
)))
589 (aref (the (simple-array read-element
(*)) ,',buf
)
590 (the fixnum
,',rptr
)))
591 (setf ,',rptr
(%
+ ,',rptr
1))))))
594 (setf (xstream-read-ptr ,stream
) ,rptr
) )))))
597 ;;;; ---------------------------------------------------------------------------
601 (define-condition xml-parse-error
(simple-error) ())
602 (define-condition well-formedness-violation
(xml-parse-error) ())
603 (define-condition validity-error
(xml-parse-error) ())
605 ;; We make some effort to signal end of file as a special condition, but we
606 ;; don't actually try very hard. Not sure whether we should. Right now I
607 ;; would prefer not to document this class.
608 (define-condition end-of-xstream
(well-formedness-violation) ())
610 (defun describe-xstream (x s
)
611 (format s
" Line ~D, column ~D in ~A~%"
612 (xstream-line-number x
)
613 (xstream-column-number x
)
614 (let ((name (xstream-name x
)))
617 "<anonymous stream>")
618 ((eq :main
(stream-name-entity-kind name
))
619 (stream-name-uri name
))
623 (defun %error
(class stream message
)
624 (let* ((zmain (if *ctx
* (main-zstream *ctx
*) nil
))
625 (zstream (if (zstream-p stream
) stream zmain
))
626 (xstream (if (xstream-p stream
) stream nil
))
627 (s (make-string-output-stream)))
628 (write-line message s
)
630 (write-line "Location:" s
)
631 (describe-xstream xstream s
))
634 (remove xstream
(remove :stop
(zstream-input-stack zstream
)))))
636 (write-line "Context:" s
)
638 (describe-xstream x s
)))))
639 (when (and zmain
(not (eq zstream zmain
)))
641 (remove xstream
(remove :stop
(zstream-input-stack zmain
)))))
643 (write-line "Context in main document:" s
)
645 (describe-xstream x s
)))))
648 :format-arguments
(list (get-output-stream-string s
)))))
650 (defun validity-error (fmt &rest args
)
651 (%error
'validity-error
653 (format nil
"Document not valid: ~?" fmt args
)))
655 (defun wf-error (stream fmt
&rest args
)
656 (%error
'well-formedness-violation
658 (format nil
"Document not well-formed: ~?" fmt args
)))
660 (defun eox (stream &optional x
&rest args
)
661 (%error
'end-of-xstream
663 (format nil
"End of file~@[: ~?~]" x args
)))
665 (defclass cxml-parser
(sax:sax-parser
) ((ctx :initarg
:ctx
)))
667 (defun parser-xstream (parser)
668 (car (zstream-input-stack (main-zstream (slot-value parser
'ctx
)))))
670 (defun parser-stream-name (parser)
671 (let ((xstream (parser-xstream parser
)))
673 (xstream-name xstream
)
676 (defmethod sax:line-number
((parser cxml-parser
))
677 (let ((x (parser-xstream parser
)))
679 (xstream-line-number x
)
682 (defmethod sax:column-number
((parser cxml-parser
))
683 (let ((x (parser-xstream parser
)))
685 (xstream-column-number x
)
688 (defmethod sax:system-id
((parser cxml-parser
))
689 (let ((name (parser-stream-name parser
)))
691 (stream-name-uri name
)
694 (defmethod sax:xml-base
((parser cxml-parser
))
695 (let ((uri (car (base-stack (slot-value parser
'ctx
)))))
696 (if (or (null uri
) (stringp uri
))
698 (puri:render-uri uri nil
))))
700 (defvar *validate
* t
)
701 (defvar *external-subset-p
* nil
)
703 (defun validate-start-element (ctx name
)
705 (let* ((pair (car (model-stack ctx
)))
706 (newval (funcall (car pair
) name
)))
708 (validity-error "(03) Element Valid: ~A" (rod-string name
)))
709 (setf (car pair
) newval
)
710 (let ((e (find-element name
(dtd ctx
))))
712 (validity-error "(03) Element Valid: no definition for ~A"
714 (maybe-compile-cspec e
)
715 (push (copy-cons (elmdef-compiled-cspec e
)) (model-stack ctx
))))))
718 (cons (car x
) (cdr x
)))
720 (defun validate-end-element (ctx name
)
722 (let ((pair (car (model-stack ctx
))))
723 (unless (eq (funcall (car pair
) nil
) t
)
724 (validity-error "(03) Element Valid: ~A" (rod-string name
)))
725 (pop (model-stack ctx
)))))
727 (defun validate-characters (ctx rod
)
729 (let ((pair (car (model-stack ctx
))))
730 (unless (funcall (cdr pair
) rod
)
731 (validity-error "(03) Element Valid: unexpected PCDATA")))))
733 (defun standalone-check-necessary-p (def)
737 (elmdef (elmdef-external-p def
))
738 (attdef (attdef-external-p def
)))))
740 ;; attribute validation, defaulting, and normalization -- except for for
741 ;; uniqueness checks, which are done after namespaces have been declared
742 (defun process-attributes (ctx name attlist
)
743 (let ((e (find-element name
(dtd ctx
))))
746 (dolist (ad (elmdef-attributes e
)) ;handle default values
747 (unless (get-attribute (attdef-name ad
) attlist
)
748 (case (attdef-default ad
)
752 (validity-error "(18) Required Attribute: ~S not specified"
753 (rod-string (attdef-name ad
)))))
755 (when (standalone-check-necessary-p ad
)
756 (validity-error "(02) Standalone Document Declaration: missing attribute value"))
757 (push (sax:make-attribute
:qname
(attdef-name ad
)
758 :value
(cadr (attdef-default ad
))
761 (dolist (a attlist
) ;normalize non-CDATA values
762 (let* ((qname (sax:attribute-qname a
))
763 (adef (find-attribute e qname
)))
765 (when (and *validate
*
766 sax
:*namespace-processing
*
767 (eq (attdef-type adef
) :ID
)
768 (find #/: (sax:attribute-value a
)))
769 (validity-error "colon in ID attribute"))
770 (unless (eq (attdef-type adef
) :CDATA
)
771 (let ((canon (canon-not-cdata-attval (sax:attribute-value a
))))
772 (when (and (standalone-check-necessary-p adef
)
773 (not (rod= (sax:attribute-value a
) canon
)))
774 (validity-error "(02) Standalone Document Declaration: attribute value not normalized"))
775 (setf (sax:attribute-value a
) canon
))))))
776 (when *validate
* ;maybe validate attribute values
778 (validate-attribute ctx e a
))))
779 ((and *validate
* attlist
)
780 (validity-error "(04) Attribute Value Type: no definition for element ~A"
781 (rod-string name
)))))
784 (defun get-attribute (name attributes
)
785 (member name attributes
:key
#'sax
:attribute-qname
:test
#'rod
=))
787 (defun validate-attribute (ctx e a
)
788 (when (sax:attribute-specified-p a
) ;defaults checked by DEFINE-ATTRIBUTE
789 (let* ((qname (sax:attribute-qname a
))
791 (or (find-attribute e qname
)
792 (validity-error "(04) Attribute Value Type: not declared: ~A"
793 (rod-string qname
)))))
794 (validate-attribute* ctx adef
(sax:attribute-value a
)))))
796 (defun validate-attribute* (ctx adef value
)
797 (let ((type (attdef-type adef
))
798 (default (attdef-default adef
)))
799 (when (and (listp default
)
800 (eq (car default
) :FIXED
)
801 (not (rod= value
(cadr default
))))
802 (validity-error "(20) Fixed Attribute Default: expected ~S but got ~S"
803 (rod-string (cadr default
))
805 (ecase (if (listp type
) (car type
) type
)
807 (unless (valid-name-p value
)
808 (validity-error "(08) ID: not a name: ~S" (rod-string value
)))
809 (when (eq (gethash value
(id-table ctx
)) t
)
810 (validity-error "(08) ID: ~S not unique" (rod-string value
)))
811 (setf (gethash value
(id-table ctx
)) t
))
813 (validate-idref ctx value
))
815 (let ((names (split-names value
)))
817 (validity-error "(11) IDREF: malformed names"))
818 (mapc (curry #'validate-idref ctx
) names
)))
820 (validate-nmtoken value
))
822 (let ((tokens (split-names value
)))
824 (validity-error "(13) Name Token: malformed NMTOKENS"))
825 (mapc #'validate-nmtoken tokens
)))
827 (unless (member value
(cdr type
) :test
#'rod
=)
828 (validity-error "(17) Enumeration: value not declared: ~S"
829 (rod-string value
))))
831 (unless (member value
(cdr type
) :test
#'rod
=)
832 (validity-error "(14) Notation Attributes: ~S" (rod-string value
))))
834 (validate-entity value
))
836 (let ((names (split-names value
)))
838 (validity-error "(13) Name Token: malformed NMTOKENS"))
839 (mapc #'validate-entity names
)))
842 (defun validate-idref (ctx value
)
843 (unless (valid-name-p value
)
844 (validity-error "(11) IDREF: not a name: ~S" (rod-string value
)))
845 (unless (gethash value
(id-table ctx
))
846 (setf (gethash value
(id-table ctx
)) nil
)))
848 (defun validate-nmtoken (value)
849 (unless (valid-nmtoken-p value
)
850 (validity-error "(13) Name Token: not a NMTOKEN: ~S"
851 (rod-string value
))))
853 (defstruct (entdef (:constructor
)))
855 (defstruct (internal-entdef
857 (:constructor make-internal-entdef
(value))
858 (:conc-name
#:entdef-
))
859 (value (error "missing argument") :type rod
)
861 (external-subset-p *external-subset-p
*))
863 (defstruct (external-entdef
865 (:constructor make-external-entdef
(extid ndata
))
866 (:conc-name
#:entdef-
))
867 (extid (error "missing argument") :type extid
)
868 (ndata nil
:type
(or rod null
)))
870 (defun validate-entity (value)
871 (unless (valid-name-p value
)
872 (validity-error "(12) Entity Name: not a name: ~S" (rod-string value
)))
873 (let ((def (let ((*validate
*
874 ;; Similarly the entity refs are internal and
875 ;; don't need normalization ... the unparsed
876 ;; entities (and entities) aren't "references"
877 ;; -- sun/valid/sa03.xml
879 (get-entity-definition value
:general
(dtd *ctx
*)))))
880 (unless (and (typep def
'external-entdef
) (entdef-ndata def
))
882 (validity-error "(12) Entity Name: ~S" (rod-string value
)))))
884 (defun split-names (rod)
885 (flet ((whitespacep (x)
886 (or (rune= x
#/U
+0009)
889 (rune= x
#/U
+0020))))
890 (if (let ((n (length rod
)))
892 (or (whitespacep (rune rod
0))
893 (whitespacep (rune rod
(1- n
))))))
895 (split-sequence-if #'whitespacep rod
:remove-empty-subseqs t
))))
897 (defun zstream-base-sysid (zstream)
899 (dolist (k (zstream-input-stack zstream
))
900 (let ((base-sysid (stream-name-uri (xstream-name k
))))
901 (when base-sysid
(return base-sysid
))))))
904 (defun absolute-uri (sysid source-stream
)
905 (let ((base-sysid (zstream-base-sysid source-stream
)))
906 ;; XXX is the IF correct?
908 (puri:merge-uris sysid base-sysid
)
911 (defstruct (extid (:constructor make-extid
(public system
)))
912 (public nil
:type
(or rod null
))
913 (system (error "missing argument") :type
(or puri
:uri null
)))
915 (defun absolute-extid (source-stream extid
)
916 (let ((sysid (extid-system extid
))
917 (result (copy-extid extid
)))
918 (setf (extid-system result
) (absolute-uri sysid source-stream
))
921 (defun define-entity (source-stream name kind def
)
922 (setf name
(intern-name name
))
923 (when (and sax
:*namespace-processing
* (find #/: name
))
924 (wf-error source-stream
"colon in entity name"))
927 (:general
(dtd-gentities (dtd *ctx
*)))
928 (:parameter
(dtd-pentities (dtd *ctx
*))))))
929 (unless (gethash name table
)
930 (when (and source-stream
(handler *ctx
*))
931 (report-entity (handler *ctx
*) kind name def
))
932 (when (typep def
'external-entdef
)
933 (setf (entdef-extid def
)
934 (absolute-extid source-stream
(entdef-extid def
))))
935 (setf (gethash name table
)
936 (cons *external-subset-p
* def
)))))
938 (defun get-entity-definition (entity-name kind dtd
)
940 (wf-error nil
"entity not defined: ~A" (rod-string entity-name
)))
941 (destructuring-bind (extp &rest def
)
944 (:general
(dtd-gentities dtd
))
945 (:parameter
(dtd-pentities dtd
)))
947 (when (and *validate
* (standalone-p *ctx
*) extp
)
948 (validity-error "(02) Standalone Document Declaration: entity reference: ~S"
949 (rod-string entity-name
)))
952 (defun entity->xstream
(zstream entity-name kind
&optional internalp
)
953 ;; `zstream' is for error messages
954 (let ((def (get-entity-definition entity-name kind
(dtd *ctx
*))))
956 (wf-error zstream
"Entity '~A' is not defined." (rod-string entity-name
)))
960 (when (and (standalone-p *ctx
*)
961 (entdef-external-subset-p def
))
964 "entity declared in external subset, but document is standalone"))
965 (setf r
(make-rod-xstream (entdef-value def
)))
966 (setf (xstream-name r
)
967 (make-stream-name :entity-name entity-name
973 "entity not internal: ~A" (rod-string entity-name
)))
974 (when (entdef-ndata def
)
976 "reference to unparsed entity: ~A"
977 (rod-string entity-name
)))
978 (setf r
(xstream-open-extid (extid-using-catalog (entdef-extid def
))))
979 (setf (stream-name-entity-name (xstream-name r
)) entity-name
980 (stream-name-entity-kind (xstream-name r
)) kind
)))
983 (defun checked-get-entdef (name type
)
984 (let ((def (get-entity-definition name type
(dtd *ctx
*))))
986 (wf-error nil
"Entity '~A' is not defined." (rod-string name
)))
989 (defun xstream-open-extid* (entity-resolver pubid sysid
)
991 (or (funcall (or entity-resolver
(constantly nil
)) pubid sysid
)
992 (open (uri-to-pathname sysid
)
993 :element-type
'(unsigned-byte 8)
994 :direction
:input
))))
996 :name
(make-stream-name :uri sysid
)
999 (defun xstream-open-extid (extid)
1000 (xstream-open-extid* (entity-resolver *ctx
*)
1001 (extid-public extid
)
1002 (extid-system extid
)))
1004 (defun call-with-entity-expansion-as-stream (zstream cont name kind internalp
)
1005 ;; `zstream' is for error messages
1006 (let ((in (entity->xstream zstream name kind internalp
)))
1007 (push (stream-name-uri (xstream-name in
)) (base-stack *ctx
*))
1010 (pop (base-stack *ctx
*))
1011 (close-xstream in
))))
1013 (defun ensure-dtd ()
1015 (setf (dtd *ctx
*) (make-dtd))
1016 (define-default-entities)))
1018 (defun define-default-entities ()
1019 (define-entity nil
#"lt" :general
(make-internal-entdef #"<"))
1020 (define-entity nil
#"gt" :general
(make-internal-entdef #">"))
1021 (define-entity nil
#"amp" :general
(make-internal-entdef #"&"))
1022 (define-entity nil
#"apos" :general
(make-internal-entdef #"'"))
1023 (define-entity nil
#"quot" :general
(make-internal-entdef #"\"")))
1026 ;; an attribute definition
1027 element
;name of element this attribute belongs to
1028 name
;name of attribute
1029 type
;type of attribute; either one of :CDATA, :ID, :IDREF, :IDREFS,
1030 ; :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS, or
1031 ; (:NOTATION <name>*)
1032 ; (:ENUMERATION <name>*)
1033 default
;default value of attribute:
1034 ; :REQUIRED, :IMPLIED, (:FIXED content) or (:DEFAULT content)
1035 (external-p *external-subset-p
*)
1039 ;; an element definition
1040 name
;name of the element
1041 content
;content model [*]
1042 attributes
;list of defined attributes
1043 compiled-cspec
;cons of validation function for contentspec
1044 (external-p *external-subset-p
*)
1047 ;; [*] in XML it is possible to define attributes before the element
1048 ;; itself is defined and since we hang attribute definitions into the
1049 ;; relevant element definitions, the `content' slot indicates whether an
1050 ;; element was actually defined. It is NIL until set to a content model
1051 ;; when the element type declaration is processed.
1053 (defun %make-rod-hash-table
()
1054 ;; XXX with portable hash tables, this is the only way to case-sensitively
1055 ;; use rods. However, EQUALP often has horrible performance! Most Lisps
1056 ;; provide extensions for user-defined equality, we should use them! There
1057 ;; is also a home-made hash table for rods defined below, written by
1058 ;; Gilbert (I think). We could also use that one, but I would prefer the
1059 ;; first method, even if it's unportable.
1060 (make-hash-table :test
1061 #+rune-is-character
'equal
1062 #-rune-is-character
'equalp
))
1065 (elements (%make-rod-hash-table
)) ;elmdefs
1066 (gentities (%make-rod-hash-table
)) ;general entities
1067 (pentities (%make-rod-hash-table
)) ;parameter entities
1068 (notations (%make-rod-hash-table
)))
1070 (defun make-dtd-cache ()
1071 (puri:make-uri-space
))
1073 (defvar *cache-all-dtds
* nil
)
1074 (defvar *dtd-cache
* (make-dtd-cache))
1076 (defun remdtd (uri dtd-cache
)
1077 (setf uri
(puri:intern-uri uri dtd-cache
))
1079 (and (getf (puri:uri-plist uri
) 'dtd
) t
)
1080 (puri:unintern-uri uri dtd-cache
)))
1082 (defun clear-dtd-cache (dtd-cache)
1083 (puri:unintern-uri t dtd-cache
))
1085 (defun getdtd (uri dtd-cache
)
1086 (getf (puri:uri-plist
(puri:intern-uri uri dtd-cache
)) 'dtd
))
1088 (defun (setf getdtd
) (newval uri dtd-cache
)
1089 (setf (getf (puri:uri-plist
(puri:intern-uri uri dtd-cache
)) 'dtd
) newval
)
1095 (defun find-element (name dtd
)
1096 (gethash name
(dtd-elements dtd
)))
1098 (defun define-element (dtd element-name
&optional content-model
)
1099 (let ((e (find-element element-name dtd
)))
1103 (setf (gethash element-name
(dtd-elements dtd
))
1104 (make-elmdef :name element-name
:content content-model
))
1106 (sax:element-declaration
(handler *ctx
*) element-name content-model
))))
1107 ((null content-model
)
1111 (when (elmdef-content e
)
1112 (validity-error "(05) Unique Element Type Declaration"))
1113 (when (eq content-model
:EMPTY
)
1114 (dolist (ad (elmdef-attributes e
))
1115 (let ((type (attdef-type ad
)))
1116 (when (and (listp type
) (eq (car type
) :NOTATION
))
1117 (validity-error "(16) No Notation on Empty Element: ~S"
1118 (rod-string element-name
)))))))
1119 (sax:element-declaration
(handler *ctx
*) element-name content-model
)
1120 (setf (elmdef-content e
) content-model
)
1121 (setf (elmdef-external-p e
) *external-subset-p
*)
1124 (defvar *redefinition-warning
* nil
)
1126 (defun define-attribute (dtd element name type default
)
1127 (let ((adef (make-attdef :element element
1131 (e (or (find-element element dtd
)
1132 (define-element dtd element
))))
1133 (when (and *validate
* (listp default
))
1134 (unless (eq (attdef-type adef
) :CDATA
)
1135 (setf (second default
) (canon-not-cdata-attval (second default
))))
1136 (validate-attribute* *ctx
* adef
(second default
)))
1137 (cond ((find-attribute e name
)
1138 (when *redefinition-warning
*
1139 (warn "Attribute \"~A\" of \"~A\" not redefined."
1141 (rod-string element
))))
1145 (when (find :ID
(elmdef-attributes e
) :key
#'attdef-type
)
1146 (validity-error "(09) One ID per Element Type: element ~A"
1147 (rod-string element
)))
1148 (unless (member default
'(:REQUIRED
:IMPLIED
))
1149 (validity-error "(10) ID Attribute Default: ~A"
1150 (rod-string element
))))
1151 (flet ((notationp (type)
1152 (and (listp type
) (eq (car type
) :NOTATION
))))
1153 (when (notationp type
)
1154 (when (find-if #'notationp
(elmdef-attributes e
)
1156 (validity-error "(15) One Notation Per Element Type: ~S"
1157 (rod-string element
)))
1158 (when (eq (elmdef-content e
) :EMPTY
)
1159 (validity-error "(16) No Notation on Empty Element: ~S"
1160 (rod-string element
))))))
1161 (sax:attribute-declaration
(handler *ctx
*) element name type default
)
1162 (push adef
(elmdef-attributes e
))))))
1164 (defun find-attribute (elmdef name
)
1165 (find name
(elmdef-attributes elmdef
) :key
#'attdef-name
:test
#'rod
=))
1167 (defun define-notation (dtd name id
)
1168 (let ((ns (dtd-notations dtd
)))
1169 (when (gethash name ns
)
1170 (validity-error "(24) Unique Notation Name: ~S" (rod-string name
)))
1171 (setf (gethash name ns
) id
)))
1173 (defun find-notation (name dtd
)
1174 (gethash name
(dtd-notations dtd
)))
1176 ;;;; ---------------------------------------------------------------------------
1177 ;;;; z streams and lexer
1185 (defun call-with-zstream (fn zstream
)
1187 (funcall fn zstream
)
1188 (dolist (input (zstream-input-stack zstream
))
1189 (cond #-x
&y-streams-are-stream
1191 (close-xstream input
))
1192 #+x
&y-streams-are-stream
1196 (defmacro with-zstream
((zstream &rest args
) &body body
)
1197 `(call-with-zstream (lambda (,zstream
) ,@body
)
1198 (make-zstream ,@args
)))
1200 (defun read-token (input)
1201 (cond ((zstream-token-category input
)
1202 (multiple-value-prog1
1203 (values (zstream-token-category input
)
1204 (zstream-token-semantic input
))
1205 (setf (zstream-token-category input
) nil
1206 (zstream-token-semantic input
) nil
)))
1208 (read-token-2 input
))))
1210 (defun peek-token (input)
1211 (cond ((zstream-token-category input
)
1213 (zstream-token-category input
)
1214 (zstream-token-semantic input
)))
1216 (multiple-value-bind (c s
) (read-token input
)
1217 (setf (zstream-token-category input
) c
1218 (zstream-token-semantic input
) s
))
1219 (values (zstream-token-category input
)
1220 (zstream-token-semantic input
)))))
1222 (defun read-token-2 (input)
1223 (cond ((null (zstream-input-stack input
))
1226 (let ((c (peek-rune (car (zstream-input-stack input
)))))
1228 (cond ((eq (cadr (zstream-input-stack input
)) :stop
)
1231 (close-xstream (pop (zstream-input-stack input
)))
1232 (if (null (zstream-input-stack input
))
1234 (values :S nil
) ;fake #x20 after PE expansion
1237 (read-token-3 input
)))))))
1239 (defvar *data-behaviour
*
1240 ) ;either :DTD or :DOC
1242 (defun read-token-3 (zinput)
1243 (let ((input (car (zstream-input-stack zinput
))))
1245 (let ((c (read-rune input
)))
1247 ;; first the common tokens
1249 (read-token-after-|
<| zinput input
))
1252 (ecase *data-behaviour
*
1254 (cond ((rune= #/\
[ c
) :\
[)
1255 ((rune= #/\
] c
) :\
])
1256 ((rune= #/\
( c
) :\
()
1257 ((rune= #/\
) c
) :\
))
1258 ((rune= #/\| c
) :\|
)
1259 ((rune= #/\
> c
) :\
>)
1260 ((rune= #/\" c
) :\")
1261 ((rune= #/\' c
) :\')
1262 ((rune= #/\
, c
) :\
,)
1263 ((rune= #/\? c
) :\?)
1264 ((rune= #/\
* c
) :\
*)
1265 ((rune= #/\
+ c
) :\
+)
1267 (unread-rune c input
)
1268 (values :nmtoken
(read-name-token input
)))
1270 (let ((q (read-name-token input
)))
1271 (cond ((rod= q
'#.
(string-rod "REQUIRED")) :|
#REQUIRED|
)
1272 ((rod= q
'#.
(string-rod "IMPLIED")) :|
#IMPLIED|
)
1273 ((rod= q
'#.
(string-rod "FIXED")) :|
#FIXED|
)
1274 ((rod= q
'#.
(string-rod "PCDATA")) :|
#PCDATA|
)
1276 (wf-error zinput
"Unknown token: ~S." q
)))))
1277 ((or (rune= c
#/U
+0020)
1283 (cond ((name-start-rune-p (peek-rune input
))
1284 ;; an entity reference
1285 (read-pe-reference zinput
))
1289 (wf-error zinput
"Unexpected character ~S." c
))))
1293 (multiple-value-bind (kind data
) (read-entity-like input
)
1294 (cond ((eq kind
:ENTITY-REFERENCE
)
1295 (values :ENTITY-REF data
))
1296 ((eq kind
:CHARACTER-REFERENCE
)
1298 (with-rune-collector (collect)
1299 (%put-unicode-char data collect
)))))))
1301 (unread-rune c input
)
1302 (values :CDATA
(read-cdata input
)))))))))))
1304 (definline check-rune
(input actual expected
)
1305 (unless (eql actual expected
)
1306 (wf-error input
"expected #/~A but found #/~A"
1307 (rune-char expected
)
1308 (rune-char actual
))))
1310 (defun read-pe-reference (zinput)
1311 (let* ((input (car (zstream-input-stack zinput
)))
1312 (nam (read-name-token input
)))
1313 (check-rune input
#/\
; (read-rune input))
1314 (cond (*expand-pe-p
*
1315 ;; no external entities here!
1316 (let ((i2 (entity->xstream zinput nam
:parameter
)))
1317 (zstream-push i2 zinput
))
1318 (values :S nil
) ;space before inserted PE expansion.
1321 (values :PE-REFERENCE nam
)) )))
1323 (defun read-token-after-|
<|
(zinput input
)
1324 (let ((d (read-rune input
)))
1326 (eox input
"EOF after '<'"))
1328 (read-token-after-|
<!| input
))
1330 (multiple-value-bind (target content
) (read-pi input
)
1331 (cond ((rod= target
'#.
(string-rod "xml"))
1332 (values :xml-decl
(cons target content
)))
1333 ((rod-equal target
'#.
(string-rod "XML"))
1335 "You lost -- no XML processing instructions."))
1336 ((and sax
:*namespace-processing
* (position #/: target
))
1338 "Processing instruction target ~S is not a ~
1342 (values :PI
(cons target content
))))))
1343 ((eq *data-behaviour
* :DTD
)
1344 (unread-rune d input
)
1345 (unless (or (rune= #// d
) (name-start-rune-p d
))
1346 (wf-error zinput
"Expected '!' or '?' after '<' in DTD."))
1347 (values :seen-
< nil
))
1349 (let ((c (peek-rune input
)))
1350 (cond ((name-start-rune-p c
)
1351 (read-tag-2 zinput input
:etag
))
1354 "Expecting name start rune after \"</\".")))))
1355 ((name-start-rune-p d
)
1356 (unread-rune d input
)
1357 (read-tag-2 zinput input
:stag
))
1359 (wf-error zinput
"Expected '!' or '?' after '<' in DTD.")))))
1361 (defun read-token-after-|
<!|
(input)
1362 (let ((d (read-rune input
)))
1364 (eox input
"EOF after \"<!\"."))
1365 ((name-start-rune-p d
)
1366 (unread-rune d input
)
1367 (let ((name (read-name-token input
)))
1368 (cond ((rod= name
'#.
(string-rod "ELEMENT")) :|
<!ELEMENT|
)
1369 ((rod= name
'#.
(string-rod "ENTITY")) :|
<!ENTITY|
)
1370 ((rod= name
'#.
(string-rod "ATTLIST")) :|
<!ATTLIST|
)
1371 ((rod= name
'#.
(string-rod "NOTATION")) :|
<!NOTATION|
)
1372 ((rod= name
'#.
(string-rod "DOCTYPE")) :|
<!DOCTYPE|
)
1374 (wf-error input
"`<!~A' unknown." (rod-string name
))))))
1376 (values :|
<![| nil
))
1378 (setf d
(read-rune input
))
1379 (cond ((rune= #/- d
)
1382 (read-comment-content input
)))
1384 (wf-error input
"Bad character ~S after \"<!-\"" d
))))
1386 (wf-error input
"Bad character ~S after \"<!\"" d
)))))
1388 (definline read-S?
(input)
1389 (while (member (peek-rune input
) '(#/U
+0020 #/U
+0009 #/U
+000A
#/U
+000D
)
1391 (consume-rune input
)))
1393 (defun read-attribute-list (zinput input imagine-space-p
)
1394 (cond ((or imagine-space-p
1395 (let ((c (peek-rune input
)))
1396 (and (not (eq c
:eof
))
1399 (cond ((eq (peek-rune input
) :eof
)
1401 ((name-start-rune-p (peek-rune input
))
1402 (cons (read-attribute zinput input
)
1403 (read-attribute-list zinput input nil
)))
1409 (defun read-entity-like (input)
1410 "Read an entity reference off the xstream `input'. Returns two values:
1411 either :ENTITY-REFERENCE <interned-rod> in case of a named entity
1412 or :CHARACTER-REFERENCE <integer> in case of character references.
1413 The initial #\\& is considered to be consumed already."
1414 (let ((c (peek-rune input
)))
1416 (eox input
"EOF after '&'"))
1418 (values :CHARACTER-REFERENCE
(read-character-reference input
)))
1420 (unless (name-start-rune-p (peek-rune input
))
1421 (wf-error input
"Expecting name after &."))
1422 (let ((name (read-name-token input
)))
1423 (setf c
(read-rune input
))
1424 (unless (rune= c
#/\
;)
1425 (wf-error input
"Expected \";\"."))
1426 (values :ENTITY-REFERENCE name
))))))
1428 (defun read-tag-2 (zinput input kind
)
1429 (let ((name (read-name-token input
))
1431 (setf atts
(read-attribute-list zinput input nil
))
1433 ;; check for double attributes
1434 (do ((q atts
(cdr q
)))
1436 (cond ((find (caar q
) (cdr q
) :key
#'car
)
1437 (wf-error zinput
"Attribute ~S has two definitions in element ~S."
1438 (rod-string (caar q
))
1439 (rod-string name
)))))
1441 (cond ((eq (peek-rune input
) #/>)
1442 (consume-rune input
)
1443 (values kind
(cons name atts
)))
1444 ((eq (peek-rune input
) #//)
1445 (consume-rune input
)
1446 (check-rune input
#/> (read-rune input
))
1447 (values :ztag
(cons name atts
)))
1449 (wf-error zinput
"syntax error in read-tag-2.")) )))
1451 (defun read-attribute (zinput input
)
1452 (unless (name-start-rune-p (peek-rune input
))
1453 (wf-error zinput
"Expected name."))
1454 ;; arg thanks to the post mortem nature of name space declarations,
1455 ;; we could only process the attribute values post mortem.
1456 (let ((name (read-name-token input
)))
1457 (while (let ((c (peek-rune input
)))
1458 (and (not (eq c
:eof
))
1459 (or (rune= c
#/U
+0020)
1462 (rune= c
#/U
+000D
))))
1463 (consume-rune input
))
1464 (unless (eq (read-rune input
) #/=)
1465 (wf-error zinput
"Expected \"=\"."))
1466 (while (let ((c (peek-rune input
)))
1467 (and (not (eq c
:eof
))
1468 (or (rune= c
#/U
+0020)
1471 (rune= c
#/U
+000D
))))
1472 (consume-rune input
))
1473 (cons name
(read-att-value-2 input
))))
1475 (defun canon-not-cdata-attval (value)
1476 ;; | If the declared value is not CDATA, then the XML processor must
1477 ;; | further process the normalized attribute value by discarding any
1478 ;; | leading and trailing space (#x20) characters, and by replacing
1479 ;; | sequences of space (#x20) characters by a single space (#x20)
1481 (with-rune-collector (collect)
1482 (let ((gimme-20 nil
)
1483 (anything-seen-p nil
))
1484 (map nil
(lambda (c)
1485 (cond ((rune= c
#/u
+0020)
1488 (when (and anything-seen-p gimme-20
)
1491 (setf anything-seen-p t
)
1495 (definline data-rune-p
(rune)
1496 ;; Any Unicode character, excluding FFFE, and FFFF.
1497 ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
1498 (let ((c (rune-code rune
)))
1499 (or (= c
#x9
) (= c
#xA
) (= c
#xD
)
1501 #+rune-is-utf-16
(<= #xD800 c
#xDFFF
)
1502 (<= #xE000 c
#xFFFD
)
1503 #-rune-is-utf-16
(<= #x10000 c
#x10FFFF
))))
1505 (defun read-att-value (zinput input mode
&optional canon-space-p
(delim nil
))
1506 (with-rune-collector-2 (collect)
1507 (labels ((muffle (input delim
)
1510 (setf c
(read-rune input
))
1511 (cond ((eql delim c
)
1516 (setf c
(peek-rune input
))
1520 (let ((c (read-character-reference input
)))
1521 (%put-unicode-char c collect
)))
1523 (unless (name-start-rune-p (peek-rune input
))
1524 (wf-error zinput
"Expecting name after &."))
1525 (let ((name (read-name-token input
)))
1526 (setf c
(read-rune input
))
1527 (check-rune input c
#/\
;)
1531 zinput name
:general
1533 (muffle (car (zstream-input-stack zinput
))
1537 ;; bypass, but never the less we
1538 ;; need to check for legal
1540 ;; Must it be defined?
1541 ;; allerdings: unparsed sind verboten
1543 (map nil
(lambda (x) (collect x
)) name
)
1544 (collect #/\
; )))))))
1545 ((and (eq mode
:ENT
) (rune= c
#/%
))
1546 (let ((d (peek-rune input
)))
1549 (unless (name-start-rune-p d
)
1550 (wf-error zinput
"Expecting name after %.")))
1551 (let ((name (read-name-token input
)))
1552 (setf c
(read-rune input
))
1553 (check-rune input c
#/\
;)
1554 (cond (*expand-pe-p
*
1556 zinput name
:parameter
1558 (muffle (car (zstream-input-stack zinput
))
1561 (wf-error zinput
"No PE here.")))))
1562 ((and (eq mode
:ATT
) (rune= c
#/<))
1563 (wf-error zinput
"unexpected #\/<"))
1564 ((and canon-space-p
(space-rune-p c
))
1566 ((not (data-rune-p c
))
1567 (wf-error zinput
"illegal char: ~S." c
))
1570 (declare (dynamic-extent #'muffle
))
1571 (muffle input
(or delim
1572 (let ((delim (read-rune input
)))
1573 (unless (member delim
'(#/\" #/\') :test
#'eql
)
1574 (wf-error zinput
"invalid attribute delimiter"))
1577 (defun read-character-reference (input)
1578 ;; The #/& is already read
1580 (let ((c (read-rune input
)))
1581 (check-rune input c
#/#)
1582 (setq c
(read-rune input
))
1587 (setq c
(read-rune input
))
1590 (unless (digit-rune-p c
16)
1591 (wf-error input
"garbage in character reference"))
1594 (with-output-to-string (sink)
1595 (write-char (rune-char c
) sink
)
1597 (setq c
(read-rune input
))
1600 (digit-rune-p c
16))
1601 (write-char (rune-char c
) sink
)))
1603 (check-rune input c
#/\
;)))
1608 (with-output-to-string (sink)
1609 (write-char (rune-char c
) sink
)
1611 (setq c
(read-rune input
))
1615 (write-char (rune-char c
) sink
)))
1617 (check-rune input c
#/\
;)))
1619 (wf-error input
"Bad char in numeric character entity."))))))
1620 (unless (code-data-char-p res
)
1623 "expansion of numeric character reference (#x~X) is no data char."
1627 (defun read-pi (input)
1628 ;; "<?" is already read
1630 (let ((c (peek-rune input
)))
1631 (unless (name-start-rune-p c
)
1632 (wf-error input
"Expecting name after '<?'"))
1633 (setf name
(read-name-token input
)))
1635 ((member (peek-rune input
) '(#/U
+0020 #/U
+0009 #/U
+000A
#/U
+000D
)
1637 (values name
(read-pi-content input
)))
1639 (unless (and (eql (read-rune input
) #/?
)
1640 (eql (read-rune input
) #/>))
1641 (wf-error input
"malformed processing instruction"))
1642 (values name
"")))))
1644 (defun read-pi-content (input)
1647 (with-rune-collector (collect)
1651 (setf d
(read-rune input
))
1654 (unless (data-rune-p d
)
1655 (wf-error input
"Illegal char: ~S." d
))
1656 (when (rune= d
#/?
) (go state-2
))
1660 (setf d
(read-rune input
))
1663 (unless (data-rune-p d
)
1664 (wf-error input
"Illegal char: ~S." d
))
1665 (when (rune= d
#/>) (return))
1673 (defun read-comment-content (input &aux d
)
1674 (with-rune-collector (collect)
1678 (setf d
(read-rune input
))
1681 (unless (data-rune-p d
)
1682 (wf-error input
"Illegal char: ~S." d
))
1683 (when (rune= d
#/-
) (go state-2
))
1687 (setf d
(read-rune input
))
1690 (unless (data-rune-p d
)
1691 (wf-error input
"Illegal char: ~S." d
))
1692 (when (rune= d
#/-
) (go state-3
))
1696 state-3
;; #/- #/- seen
1697 (setf d
(read-rune input
))
1700 (unless (data-rune-p d
)
1701 (wf-error input
"Illegal char: ~S." d
))
1702 (when (rune= d
#/>) (return))
1703 (wf-error input
"'--' not allowed in a comment")
1712 (defun read-cdata-sect (input &aux d
)
1713 ;; <![CDATA[ is already read
1714 ;; read anything up to ]]>
1715 (with-rune-collector (collect)
1719 (setf d
(read-rune input
))
1722 (unless (data-rune-p d
)
1723 (wf-error input
"Illegal char: ~S." d
))
1724 (when (rune= d
#/\
]) (go state-2
))
1728 (setf d
(read-rune input
))
1731 (unless (data-rune-p d
)
1732 (wf-error input
"Illegal char: ~S." d
))
1733 (when (rune= d
#/\
]) (go state-3
))
1737 state-3
;; #/\] #/\] seen
1738 (setf d
(read-rune input
))
1741 (unless (data-rune-p d
)
1742 (wf-error input
"Illegal char: ~S." d
))
1745 (when (rune= d
#/\
])
1753 ;; some character categories
1755 (defun space-rune-p (rune)
1756 (declare (type rune rune
))
1757 (or (rune= rune
#/U
+0020)
1758 (rune= rune
#/U
+0009)
1759 (rune= rune
#/U
+000A
)
1760 (rune= rune
#/U
+000D
)))
1762 (defun code-data-char-p (c)
1763 ;; Any Unicode character, excluding FFFE, and FFFF.
1764 ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
1765 (or (= c
#x9
) (= c
#xA
) (= c
#xD
)
1767 #+rune-is-utf-16
(<= #xD800 c
#xDFFF
)
1768 (<= #xE000 c
#xFFFD
)
1769 #-rune-is-utf-16
(<= #x10000 c
#x10FFFF
)))
1771 (defun pubid-char-p (c)
1772 (or (rune= c
#/u
+0020) (rune= c
#/u
+000D
) (rune= c
#/u
+000A
)
1776 (member c
'(#/-
#/' #/\
( #/\
) #/+ #/, #/.
#//
1777 #/: #/= #/?
#/\
; #/! #/* #/#
1781 (defun expect (input category
)
1782 (multiple-value-bind (cat sem
) (read-token input
)
1783 (unless (eq cat category
)
1784 (wf-error input
"Expected ~S saw ~S [~S]" category cat sem
))
1787 (defun consume-token (input)
1790 ;;;; ---------------------------------------------------------------------------
1795 ;; S ::= (#x20 | #x9 | #xD | #xA)+
1797 (while (eq (peek-token input
) :S
)
1798 (consume-token input
)))
1801 ;; S ::= (#x20 | #x9 | #xD | #xA)+
1802 (while (eq (peek-token input
) :S
)
1803 (consume-token input
)))
1805 (defun p/nmtoken
(input)
1806 (nth-value 1 (expect input
:nmtoken
)))
1808 (defun p/name
(input)
1809 (let ((result (p/nmtoken input
)))
1810 (unless (name-start-rune-p (elt result
0))
1811 (wf-error input
"Expected name."))
1814 (defun p/attlist-decl
(input)
1815 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name (S AttDef)* S? '>'
1817 (expect input
:|
<!ATTLIST|
)
1819 (setf elm-name
(p/nmtoken input
))
1821 (let ((tok (read-token input
)))
1825 (cond ((eq (peek-token input
) :>)
1826 (consume-token input
)
1829 (multiple-value-bind (name type default
) (p/attdef input
)
1830 (define-attribute (dtd *ctx
*) elm-name name type default
)) )))
1835 "Expected either another AttDef or end of \"<!ATTLIST\". -- saw ~S."
1838 (defun p/attdef
(input)
1839 ;; [53] AttDef ::= Name S AttType S DefaultDecl
1840 (let (name type default
)
1841 (setf name
(p/nmtoken input
))
1843 (setf type
(p/att-type input
))
1845 (setf default
(p/default-decl input
))
1846 (values name type default
)))
1848 (defun p/list
(input item-parser delimiter
)
1849 ;; Parse something like S? <item> (S? <delimiter> <item>)* S?
1851 (declare (type function item-parser
))
1854 (setf res
(list (funcall item-parser input
)))
1857 (cond ((eq (peek-token input
) delimiter
)
1858 (consume-token input
)
1860 (push (funcall item-parser input
) res
))
1866 (defun p/att-type
(input)
1867 ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType
1868 ;; [55] StringType ::= 'CDATA'
1869 ;; [56] TokenizedType ::= 'ID' /*VC: ID */
1870 ;; /*VC: One ID per Element Type */
1871 ;; /*VC: ID Attribute Default */
1872 ;; | 'IDREF' /*VC: IDREF */
1873 ;; | 'IDREFS' /*VC: IDREF */
1874 ;; | 'ENTITY' /*VC: Entity Name */
1875 ;; | 'ENTITIES' /*VC: Entity Name */
1876 ;; | 'NMTOKEN' /*VC: Name Token */
1877 ;; | 'NMTOKENS' /*VC: Name Token */
1878 ;; [57] EnumeratedType ::= NotationType | Enumeration
1879 ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1880 ;; /* VC: Notation Attributes */
1881 ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */
1882 (multiple-value-bind (cat sem
) (read-token input
)
1883 (cond ((eq cat
:nmtoken
)
1884 (cond ((rod= sem
'#.
(string-rod "CDATA")) :CDATA
)
1885 ((rod= sem
'#.
(string-rod "ID")) :ID
)
1886 ((rod= sem
'#.
(string-rod "IDREF")) :IDREFS
)
1887 ((rod= sem
'#.
(string-rod "IDREFS")) :IDREFS
)
1888 ((rod= sem
'#.
(string-rod "ENTITY")) :ENTITY
)
1889 ((rod= sem
'#.
(string-rod "ENTITIES")) :ENTITIES
)
1890 ((rod= sem
'#.
(string-rod "NMTOKEN")) :NMTOKEN
)
1891 ((rod= sem
'#.
(string-rod "NMTOKENS")) :NMTOKENS
)
1892 ((rod= sem
'#.
(string-rod "NOTATION"))
1896 (setf names
(p/list input
#'p
/nmtoken
:\|
))
1899 (setf (referenced-notations *ctx
*)
1900 (append names
(referenced-notations *ctx
*))))
1901 (cons :NOTATION names
)))
1903 (wf-error input
"In p/att-type: ~S ~S." cat sem
))))
1905 ;; XXX Die Nmtoken-Syntax pruefen wir derzeit nur beim Validieren.
1907 ;;(expect input :\()
1908 (setf names
(p/list input
#'p
/nmtoken
:\|
))
1910 (cons :ENUMERATION names
)))
1912 (wf-error input
"In p/att-type: ~S ~S." cat sem
)) )))
1914 (defun p/default-decl
(input)
1915 ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1916 ;; | (('#FIXED' S)? AttValue) /* VC: Required Attribute */
1918 ;; /* VC: Attribute Default Legal */
1919 ;; /* WFC: No < in Attribute Values */
1920 ;; /* VC: Fixed Attribute Default */
1921 (multiple-value-bind (cat sem
) (peek-token input
)
1922 (cond ((eq cat
:|
#REQUIRED|
)
1923 (consume-token input
) :REQUIRED
)
1924 ((eq cat
:|
#IMPLIED|
)
1925 (consume-token input
) :IMPLIED
)
1927 (consume-token input
)
1929 (list :FIXED
(p/att-value input
)))
1930 ((or (eq cat
:\') (eq cat
:\"))
1931 (list :DEFAULT
(p/att-value input
)))
1933 (wf-error input
"p/default-decl: ~S ~S." cat sem
)) )))
1936 ;; [70] EntityDecl ::= GEDecl | PEDecl
1937 ;; [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1938 ;; [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1939 ;; [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1940 ;; [74] PEDef ::= EntityValue | ExternalID
1941 ;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1942 ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
1943 ;; [76] NDataDecl ::= S 'NDATA' S Name /* VC: Notation Declared */
1945 (defun p/entity-decl
(input)
1946 (let (name def kind
)
1947 (expect input
:|
<!ENTITY|
)
1949 (cond ((eq (peek-token input
) :%
)
1950 (setf kind
:parameter
)
1951 (consume-token input
)
1954 (setf kind
:general
)))
1955 (setf name
(p/name input
))
1957 (setf def
(p/entity-def input kind
))
1958 (define-entity input name kind def
)
1960 (expect input
:\
>)))
1962 (defun report-entity (h kind name def
)
1965 (let ((extid (entdef-extid def
))
1966 (ndata (entdef-ndata def
)))
1968 (sax:unparsed-entity-declaration h
1970 (extid-public extid
)
1971 (uri-rod (extid-system extid
))
1973 (sax:external-entity-declaration h
1976 (extid-public extid
)
1977 (uri-rod (extid-system extid
))))))
1979 (sax:internal-entity-declaration h kind name
(entdef-value def
)))))
1981 (defun p/entity-def
(input kind
)
1982 (multiple-value-bind (cat sem
) (peek-token input
)
1983 (cond ((member cat
'(:\" :\'))
1984 (make-internal-entdef (p/entity-value input
)))
1985 ((and (eq cat
:nmtoken
)
1986 (or (rod= sem
'#.
(string-rod "SYSTEM"))
1987 (rod= sem
'#.
(string-rod "PUBLIC"))))
1989 (setf extid
(p/external-id input nil
))
1990 (when (eq kind
:general
) ;NDATA allowed at all?
1991 (cond ((eq (peek-token input
) :S
)
1993 (when (and (eq (peek-token input
) :nmtoken
)
1994 (rod= (nth-value 1 (peek-token input
))
1995 '#.
(string-rod "NDATA")))
1996 (consume-token input
)
1998 (setf ndata
(p/nmtoken input
))
2000 (push ndata
(referenced-notations *ctx
*)))))))
2001 (make-external-entdef extid ndata
)))
2003 (wf-error input
"p/entity-def: ~S / ~S." cat sem
)) )))
2005 (defun p/entity-value
(input)
2006 (let ((delim (if (eq (read-token input
) :\") #/\" #/\')))
2007 (read-att-value input
2008 (car (zstream-input-stack input
))
2013 (defun p/att-value
(input)
2014 (let ((delim (if (eq (read-token input
) :\") #/\" #/\')))
2015 (read-att-value input
2016 (car (zstream-input-stack input
))
2021 (defun p/external-id
(input &optional
(public-only-ok-p nil
))
2022 ;; xxx public-only-ok-p
2023 (multiple-value-bind (cat sem
) (read-token input
)
2024 (cond ((and (eq cat
:nmtoken
) (rod= sem
'#.
(string-rod "SYSTEM")))
2026 (make-extid nil
(p/system-literal input
)))
2027 ((and (eq cat
:nmtoken
) (rod= sem
'#.
(string-rod "PUBLIC")))
2030 (setf pub
(p/pubid-literal input
))
2031 (when (eq (peek-token input
) :S
)
2033 (when (member (peek-token input
) '(:\" :\'))
2034 (setf sys
(p/system-literal input
))))
2035 (when (and (not public-only-ok-p
)
2037 (wf-error input
"System identifier needed for this PUBLIC external identifier."))
2038 (make-extid pub sys
)))
2040 (wf-error input
"Expected external-id: ~S / ~S." cat sem
)))))
2043 ;; [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2044 ;; [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2045 ;; [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9]
2046 ;; | [-'()+,./:=?;!*#@$_%]
2049 (multiple-value-bind (cat) (read-token input
)
2050 (cond ((member cat
'(:\" :\'))
2051 (let ((delim (if (eq cat
:\") #/\" #/\')))
2052 (with-rune-collector (collect)
2054 (let ((c (read-rune (car (zstream-input-stack input
)))))
2056 (eox input
"EOF in system literal."))
2062 (wf-error input
"Expect either \" or \'.")))))
2064 ;; it is important to cache the orginal URI rod, since the re-serialized
2065 ;; uri-string can be different from the one parsed originally.
2066 (defun uri-rod (uri)
2068 (or (getf (puri:uri-plist uri
) 'original-rod
)
2069 (rod (puri:render-uri uri nil
)))
2072 (defun safe-parse-uri (str)
2073 ;; puri doesn't like strings starting with file:///, although that is a very
2074 ;; common is practise. Cut it away, we don't distinguish between scheme
2075 ;; :FILE and NIL anway.
2076 (when (eql (search "file://" str
) 0)
2077 (setf str
(subseq str
(length "file://"))))
2078 (puri:parse-uri
(coerce str
'simple-string
)))
2080 (defun p/system-literal
(input)
2081 (let* ((rod (p/id input
))
2082 (result (safe-parse-uri (rod-string rod
))))
2083 (setf (getf (puri:uri-plist result
) 'original-rod
) rod
)
2086 (defun p/pubid-literal
(input)
2087 (let ((result (p/id input
)))
2088 (unless (every #'pubid-char-p result
)
2089 (wf-error input
"Illegal pubid: ~S." (rod-string result
)))
2095 (defun p/element-decl
(input)
2097 (expect input
:|
<!ELEMENT|
)
2099 (setf name
(p/nmtoken input
))
2101 (setf content
(normalize-mixed-cspec (p/cspec input
)))
2102 (unless (legal-content-model-p content
*validate
*)
2103 (wf-error input
"Malformed or invalid content model: ~S." (mu content
)))
2106 (define-element (dtd *ctx
*) name content
)
2107 (list :element name content
)))
2109 (defun maybe-compile-cspec (e)
2110 (or (elmdef-compiled-cspec e
)
2111 (setf (elmdef-compiled-cspec e
)
2112 (let ((cspec (elmdef-content e
)))
2114 (validity-error "(03) Element Valid: no definition for ~A"
2115 (rod-string (elmdef-name e
))))
2116 (multiple-value-call #'cons
2117 (compile-cspec cspec
(standalone-check-necessary-p e
)))))))
2119 (defun make-root-model (name)
2120 (cons (lambda (actual-name)
2121 (if (rod= actual-name name
)
2126 ;;; content spec validation:
2128 ;;; Given a `contentspec', COMPILE-CSPEC returns as multiple values two
2129 ;;; functions A and B of one argument to be called for every
2130 ;;; A. child element
2131 ;;; B. text child node
2133 ;;; Function A will be called with
2134 ;;; - the element name rod as its argument. If that element may appear
2135 ;;; at the current position, a new function to be called for the next
2136 ;;; child is returned. Otherwise NIL is returned.
2137 ;;; - argument NIL at the end of the element, it must then return T or NIL
2138 ;;; to indicate whether the end tag is valid.
2140 ;;; Function B will be called with the character data rod as its argument, it
2141 ;;; returns a boolean indicating whether this text node is allowed.
2143 ;;; That is, if one of the functions ever returns NIL, the node is
2144 ;;; rejected as invalid.
2146 (defun cmodel-done (actual-value)
2147 (null actual-value
))
2149 (defun compile-cspec (cspec &optional standalone-check
)
2153 (:EMPTY
(values #'cmodel-done
(constantly nil
)))
2154 (:PCDATA
(values #'cmodel-done
(constantly t
)))
2156 (values (labels ((doit (name) (if name
#'doit t
))) #'doit
)
2158 ((and (eq (car cspec
) '*)
2159 (let ((subspec (second cspec
)))
2160 (and (eq (car subspec
) 'or
) (eq (cadr subspec
) :PCDATA
))))
2161 (values (compile-mixed (second cspec
))
2164 (values (compile-content-model cspec
)
2166 (when standalone-check
2167 (validity-error "(02) Standalone Document Declaration: whitespace"))
2168 (every #'white-space-rune-p rod
))))))
2170 (defun compile-mixed (cspec)
2171 ;; das koennten wir theoretisch auch COMPILE-CONTENT-MODEL erledigen lassen
2172 (let ((allowed-names (cddr cspec
)))
2173 (labels ((doit (actual-name)
2175 ((null actual-name
) t
)
2176 ((member actual-name allowed-names
:test
#'rod
=) #'doit
)
2180 (defun compile-content-model (cspec &optional
(continuation #'cmodel-done
))
2182 (lambda (actual-name)
2183 (if (and actual-name
(rod= cspec actual-name
))
2188 (labels ((traverse (seq)
2189 (compile-content-model (car seq
)
2191 (traverse (cdr seq
))
2193 (traverse (cdr cspec
))))
2195 (let ((options (mapcar (rcurry #'compile-content-model continuation
)
2197 (lambda (actual-name)
2198 (some (rcurry #'funcall actual-name
) options
))))
2200 (let ((maybe (compile-content-model (second cspec
) continuation
)))
2201 (lambda (actual-name)
2202 (or (funcall maybe actual-name
)
2203 (funcall continuation actual-name
)))))
2205 (let (maybe-continuation)
2206 (labels ((recurse (actual-name)
2207 (if (null actual-name
)
2208 (funcall continuation actual-name
)
2209 (or (funcall maybe-continuation actual-name
)
2210 (funcall continuation actual-name
)))))
2211 (setf maybe-continuation
2212 (compile-content-model (second cspec
) #'recurse
))
2215 (let ((it (cadr cspec
)))
2216 (compile-content-model `(and ,it
(* ,it
)) continuation
))))))
2218 (defun setp (list &key
(test 'eql
))
2219 (equal list
(remove-duplicates list
:test test
)))
2221 (defun legal-content-model-p (cspec &optional validate
)
2222 (or (eq cspec
:PCDATA
)
2227 (consp (cadr cspec
))
2228 (eq (car (cadr cspec
)) 'or
)
2229 (eq (cadr (cadr cspec
)) :PCDATA
)
2230 (every #'vectorp
(cddr (cadr cspec
)))
2231 (if (and validate
(not (setp (cddr (cadr cspec
)) :test
#'rod
=)))
2232 (validity-error "VC: No Duplicate Types (07)")
2235 (cond ((member x
'(:PCDATA
:ANY
:EMPTY
))
2238 ((and (walk (car x
))
2242 ;; wir fahren besser, wenn wir machen:
2244 ;; cspec ::= 'EMPTY' | 'ANY' | '#PCDATA'
2247 ;; cs ::= '(' S? cspec ( S? '|' S? cspec)* S? ')' ('?' | '*' | '+')?
2248 ;; und eine post factum analyse
2250 (defun p/cspec
(input &optional recursivep
)
2252 (let ((names nil
) op-cat op res stream
)
2253 (multiple-value-bind (cat sem
) (peek-token input
)
2254 (cond ((eq cat
:nmtoken
)
2255 (consume-token input
)
2256 (cond ((rod= sem
'#.
(string-rod "EMPTY"))
2258 ((rod= sem
'#.
(string-rod "ANY"))
2261 (wf-error input
"invalid content spec"))
2265 (consume-token input
)
2268 (setf stream
(car (zstream-input-stack input
)))
2269 (consume-token input
)
2271 (setq names
(list (p/cspec input t
)))
2273 (cond ((member (peek-token input
) '(:\|
:\
,))
2274 (setf op-cat
(peek-token input
))
2275 (setf op
(if (eq op-cat
:\
,) 'and
'or
))
2276 (while (eq (peek-token input
) op-cat
)
2277 (consume-token input
)
2279 (push (p/cspec input t
) names
)
2281 (setf res
(cons op
(reverse names
))))
2283 (setf res
(cons 'and names
))))
2287 (unless (eq stream
(car (zstream-input-stack input
)))
2288 (validity-error "(06) Proper Group/PE Nesting")))
2291 (wf-error input
"p/cspec - ~s / ~s" cat sem
)))))))
2292 (cond ((eq (peek-token input
) :?
) (consume-token input
) (list '? term
))
2293 ((eq (peek-token input
) :+) (consume-token input
) (list '+ term
))
2294 ((eq (peek-token input
) :*) (consume-token input
) (list '* term
))
2298 (defun normalize-mixed-cspec (cspec)
2299 ;; der Parser oben funktioniert huebsch fuer die children-Regel, aber
2300 ;; fuer Mixed ist das Ergebnis nicht praktisch, denn dort wollen wir
2301 ;; eigentlich auf eine Liste von Namen in einheitlichem Format hinaus.
2302 ;; Dazu normalisieren wir einfach in eine der beiden folgenden Formen:
2303 ;; (* (or :PCDATA ...rods...)) -- und zwar exakt so!
2304 ;; :PCDATA -- sonst ganz trivial
2305 (flet ((trivialp (c)
2307 (and (eq (car c
) 'and
)
2308 (eq (cadr c
) :PCDATA
)
2310 (if (or (trivialp cspec
) ;(and PCDATA)
2311 (and (consp cspec
) ;(* (and PCDATA))
2312 (and (eq (car cspec
) '*)
2314 (trivialp (cadr cspec
)))))
2318 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2321 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDefs S? '>'
2322 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name S? '>'
2323 ;; [53] AttDefs ::= S Name S AttType S DefaultDecl AttDefs
2326 (defun p/notation-decl
(input)
2328 (expect input
:|
<!NOTATION|
)
2330 (setf name
(p/name input
))
2332 (setf id
(p/external-id input t
))
2335 (sax:notation-declaration
(handler *ctx
*)
2337 (if (extid-public id
)
2338 (normalize-public-id (extid-public id
))
2340 (uri-rod (extid-system id
)))
2341 (when (and sax
:*namespace-processing
* (find #/: name
))
2342 (wf-error input
"colon in notation name"))
2344 (define-notation (dtd *ctx
*) name id
))
2345 (list :notation-decl name id
)))
2347 (defun normalize-public-id (rod)
2348 (with-rune-collector (collect)
2349 (let ((gimme-20 nil
)
2350 (anything-seen-p nil
))
2351 (map nil
(lambda (c)
2353 ((or (rune= c
#/u
+0009)
2359 (when (and anything-seen-p gimme-20
)
2362 (setf anything-seen-p t
)
2368 (defun p/conditional-sect
(input)
2369 (expect input
:<!\
[ )
2370 (let ((stream (car (zstream-input-stack input
))))
2372 (multiple-value-bind (cat sem
) (read-token input
)
2373 (cond ((and (eq cat
:nmtoken
)
2374 (rod= sem
'#.
(string-rod "INCLUDE")))
2375 (p/include-sect input stream
))
2376 ((and (eq cat
:nmtoken
)
2377 (rod= sem
'#.
(string-rod "IGNORE")))
2378 (p/ignore-sect input stream
))
2380 (wf-error input
"Expected INCLUDE or IGNORE after \"<![\"."))))))
2382 (defun p/cond-expect
(input cat initial-stream
)
2385 (unless (eq (car (zstream-input-stack input
)) initial-stream
)
2386 (validity-error "(21) Proper Conditional Section/PE Nesting"))))
2388 (defun p/include-sect
(input initial-stream
)
2389 ;; <![INCLUDE is already read.
2391 (p/cond-expect input
:\
[ initial-stream
)
2392 (p/ext-subset-decl input
)
2393 (p/cond-expect input
:\
] initial-stream
)
2394 (p/cond-expect input
:\
] initial-stream
)
2395 (p/cond-expect input
:\
> initial-stream
))
2397 (defun p/ignore-sect
(input initial-stream
)
2398 ;; <![IGNORE is already read.
2399 ;; XXX Is VC 21 being checked for nested sections?
2401 (p/cond-expect input
:\
[ initial-stream
)
2402 (let ((input (car (zstream-input-stack input
))))
2404 (do ((c1 (read-rune input
) (read-rune input
))
2408 (declare (type fixnum level
))
2410 (eox input
"EOF in <![IGNORE ... >")))
2411 (cond ((and (rune= c3
#/<) (rune= c2
#/!) (rune= c1
#/\
[))
2413 (cond ((and (rune= c3
#/\
]) (rune= c2
#/\
]) (rune= c1
#/>))
2415 (unless (eq (car (zstream-input-stack input
)) initial-stream
)
2416 (validity-error "(21) Proper Conditional Section/PE Nesting")))
2418 (defun p/ext-subset-decl
(input)
2419 ;; ( markupdecl | conditionalSect | S )*
2421 (case (let ((*expand-pe-p
* nil
)) (peek-token input
))
2422 (:|
<![|
(let ((*expand-pe-p
* t
)) (p/conditional-sect input
)))
2423 (:S
(consume-token input
))
2425 ((:|
<!ELEMENT|
:|
<!ATTLIST|
:|
<!ENTITY|
:|
<!NOTATION|
:PI
:COMMENT
)
2426 (let ((*expand-pe-p
* t
)
2427 (*external-subset-p
* t
))
2428 (p/markup-decl input
)))
2430 (let ((name (nth-value 1 (read-token input
))))
2431 (recurse-on-entity input name
:parameter
2433 (etypecase (checked-get-entdef name
:parameter
)
2435 (p/ext-subset input
))
2437 (p/ext-subset-decl input
)))
2438 (unless (eq :eof
(peek-token input
))
2439 (wf-error input
"Trailing garbage."))))))
2440 (otherwise (return)))) )
2442 (defun p/markup-decl
(input)
2444 (let ((stream (car (zstream-input-stack input
))))
2445 (multiple-value-prog1
2446 (p/markup-decl-unsafe input
)
2448 (unless (eq stream
(car (zstream-input-stack input
)))
2449 (validity-error "(01) Proper Declaration/PE Nesting"))))))
2451 (defun p/markup-decl-unsafe
(input)
2452 ;; markupdecl ::= elementdecl | AttlistDecl /* VC: Proper Declaration/PE Nesting */
2453 ;; | EntityDecl | NotationDecl
2454 ;; | PI | Comment /* WFC: PEs in Internal Subset */
2455 (let ((token (peek-token input
))
2456 (*expand-pe-p
* (and *expand-pe-p
* *external-subset-p
*)))
2458 (:|
<!ELEMENT|
(p/element-decl input
))
2459 (:|
<!ATTLIST|
(p/attlist-decl input
))
2460 (:|
<!ENTITY|
(p/entity-decl input
))
2461 (:|
<!NOTATION|
(p/notation-decl input
))
2463 (let ((sem (nth-value 1 (read-token input
))))
2464 (sax:processing-instruction
(handler *ctx
*) (car sem
) (cdr sem
))))
2465 (:COMMENT
(consume-token input
))
2467 (wf-error input
"p/markup-decl ~S" (peek-token input
))))))
2469 (defun setup-encoding (input xml-header
)
2470 (when (xml-header-encoding xml-header
)
2471 (let ((enc (find-encoding (xml-header-encoding xml-header
))))
2473 (setf (xstream-encoding (car (zstream-input-stack input
)))
2476 (warn "There is no such encoding: ~S." (xml-header-encoding xml-header
)))))))
2478 (defun set-full-speed (input)
2479 (let ((xstream (car (zstream-input-stack input
))))
2481 (set-to-full-speed xstream
))))
2483 (defun p/ext-subset
(input)
2484 (cond ((eq (peek-token input
) :xml-decl
)
2485 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input
))))))
2486 (setup-encoding input hd
))
2487 (consume-token input
)))
2488 (set-full-speed input
)
2489 (p/ext-subset-decl input
)
2490 (unless (eq (peek-token input
) :eof
)
2491 (wf-error input
"Trailing garbage - ~S." (peek-token input
))))
2493 (defvar *catalog
* nil
)
2495 (defun extid-using-catalog (extid)
2498 (resolve-extid (extid-public extid
)
2499 (extid-system extid
)
2502 (make-extid nil sysid
)
2506 (defun p/doctype-decl
(input &optional dtd-extid
)
2508 (let ((*expand-pe-p
* nil
)
2510 (expect input
:|
<!DOCTYPE|
)
2512 (setq name
(p/nmtoken input
))
2514 (setf (model-stack *ctx
*) (list (make-root-model name
))))
2515 (when (eq (peek-token input
) :S
)
2517 (unless (or (eq (peek-token input
) :\
[ )
2518 (eq (peek-token input
) :\
> ))
2519 (setf extid
(p/external-id input t
))))
2521 (setf extid dtd-extid
))
2523 (sax:start-dtd
(handler *ctx
*)
2525 (and extid
(extid-public extid
))
2526 (and extid
(uri-rod (extid-system extid
))))
2527 (when (eq (peek-token input
) :\
[ )
2528 (when (disallow-internal-subset *ctx
*)
2529 (wf-error input
"document includes an internal subset"))
2531 (consume-token input
)
2532 (sax:start-internal-subset
(handler *ctx
*))
2533 (while (progn (p/S? input
)
2534 (not (eq (peek-token input
) :\
] )))
2535 (if (eq (peek-token input
) :PE-REFERENCE
)
2536 (let ((name (nth-value 1 (read-token input
))))
2537 (recurse-on-entity input name
:parameter
2539 (etypecase (checked-get-entdef name
:parameter
)
2541 (p/ext-subset input
))
2543 (p/ext-subset-decl input
)))
2544 (unless (eq :eof
(peek-token input
))
2545 (wf-error input
"Trailing garbage.")))))
2546 (let ((*expand-pe-p
* t
))
2547 (p/markup-decl input
))))
2548 (consume-token input
)
2549 (sax:end-internal-subset
(handler *ctx
*))
2553 (let* ((effective-extid
2554 (extid-using-catalog (absolute-extid input extid
)))
2555 (sysid (extid-system effective-extid
))
2556 (fresh-dtd-p (null (dtd *ctx
*)))
2559 (not (standalone-p *ctx
*))
2560 (getdtd sysid
*dtd-cache
*))))
2563 (setf (dtd *ctx
*) cached-dtd
)
2564 (report-cached-dtd cached-dtd
))
2566 (let ((xi2 (xstream-open-extid effective-extid
)))
2567 (with-zstream (zi2 :input-stack
(list xi2
))
2570 (when (and fresh-dtd-p
2573 (not (standalone-p *ctx
*)))
2574 (setf (getdtd sysid
*dtd-cache
*) (dtd *ctx
*)))))))))
2575 (sax:end-dtd
(handler *ctx
*))
2576 (let ((dtd (dtd *ctx
*)))
2577 (sax:entity-resolver
2579 (lambda (name handler
) (resolve-entity name handler dtd
)))
2580 (sax::dtd
(handler *ctx
*) dtd
))
2581 (list :DOCTYPE name extid
))))
2583 (defun report-cached-dtd (dtd)
2584 (maphash (lambda (k v
)
2585 (report-entity (handler *ctx
*) :general k
(cdr v
)))
2586 (dtd-gentities dtd
))
2587 (maphash (lambda (k v
)
2588 (report-entity (handler *ctx
*) :parameter k
(cdr v
)))
2589 (dtd-pentities dtd
))
2590 (maphash (lambda (k v
)
2591 (sax:notation-declaration
2594 (if (extid-public v
)
2595 (normalize-public-id (extid-public v
))
2597 (uri-rod (extid-system v
))))
2598 (dtd-notations dtd
)))
2600 (defun p/misc
*-
2 (input)
2602 (while (member (peek-token input
) '(:COMMENT
:PI
:S
))
2603 (case (peek-token input
)
2605 (sax:comment
(handler *ctx
*) (nth-value 1 (peek-token input
))))
2607 (sax:processing-instruction
2609 (car (nth-value 1 (peek-token input
)))
2610 (cdr (nth-value 1 (peek-token input
))))))
2611 (consume-token input
)))
2615 &key validate dtd root entity-resolver disallow-internal-subset
2617 ;; check types of user-supplied arguments for better error messages:
2618 (check-type validate boolean
)
2619 (check-type recode boolean
)
2620 (check-type dtd
(or null extid
))
2621 (check-type root
(or null rod
))
2622 (check-type entity-resolver
(or null function symbol
))
2623 (check-type disallow-internal-subset boolean
)
2626 (setf handler
(make-recoder handler
#'rod-to-utf8-string
)))
2627 (let* ((xstream (car (zstream-input-stack input
)))
2628 (name (xstream-name xstream
))
2629 (base (when name
(stream-name-uri name
)))
2631 (make-context :handler handler
2633 :entity-resolver entity-resolver
2634 :base-stack
(list (or base
""))
2635 :disallow-internal-subset disallow-internal-subset
))
2636 (*validate
* validate
)
2637 (*namespace-bindings
* *initial-namespace-bindings
*))
2638 (sax:register-sax-parser handler
(make-instance 'cxml-parser
:ctx
*ctx
*))
2639 (sax:start-document handler
)
2640 ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
2641 ;; Misc ::= Comment | PI | S
2642 ;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
2643 ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
2644 (let ((*data-behaviour
* :DTD
))
2645 ;; optional XMLDecl?
2649 ;; (doctypedecl Misc*)?
2651 ((eq (peek-token input
) :<!DOCTYPE
)
2652 (p/doctype-decl input dtd
)
2655 (synthesize-doctype dtd input
))
2656 ((and validate
(not dtd
))
2657 (validity-error "invalid document: no doctype")))
2659 ;; Override expected root element if asked to
2661 (setf (model-stack *ctx
*) (list (make-root-model root
))))
2663 (let ((*data-behaviour
* :DOC
))
2669 (sax:end-document handler
))))
2671 (defun synthesize-doctype (dtd input
)
2672 (let ((dummy (string->xstream
"<!DOCTYPE dummy>")))
2673 (setf (xstream-name dummy
)
2675 :entity-name
"dummy doctype"
2677 :uri
(zstream-base-sysid input
)))
2678 (with-zstream (zstream :input-stack
(list dummy
))
2679 (p/doctype-decl zstream dtd
))))
2681 (defun fix-seen-< (input)
2682 (when (eq (peek-token input
) :seen-
<)
2683 (multiple-value-bind (c s
)
2684 (read-token-after-|
<| input
(car (zstream-input-stack input
)))
2685 (setf (zstream-token-category input
) c
2686 (zstream-token-semantic input
) s
))))
2688 (defun p/xmldecl
(input)
2689 ;; we will use the attribute-value parser for the xml decl.
2691 (when (eq (peek-token input
) :xml-decl
)
2692 (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input
))))))
2693 (setf (standalone-p *ctx
*) (eq (xml-header-standalone-p hd
) :yes
))
2694 (setup-encoding input hd
)
2697 (set-full-speed input
)))
2699 (defun p/eof
(input)
2700 (unless (eq (peek-token input
) :eof
)
2701 (wf-error input
"Garbage at end of document."))
2703 (maphash (lambda (k v
)
2705 (validity-error "(11) IDREF: ~S not defined" (rod-string k
))))
2708 (dolist (name (referenced-notations *ctx
*))
2709 (unless (find-notation name
(dtd *ctx
*))
2710 (validity-error "(23) Notation Declared: ~S" (rod-string name
))))))
2712 (defun p/element
(input)
2713 (multiple-value-bind (cat n-b new-b uri lname qname attrs
) (p/sztag input
)
2714 (sax:start-element
(handler *ctx
*) uri lname qname attrs
)
2715 (when (eq cat
:stag
)
2716 (let ((*namespace-bindings
* n-b
))
2718 (p/etag input qname
))
2719 (sax:end-element
(handler *ctx
*) uri lname qname
)
2720 (undeclare-namespaces new-b
)
2721 (pop (base-stack *ctx
*))
2722 (validate-end-element *ctx
* qname
)))
2724 (defun p/sztag
(input)
2725 (multiple-value-bind (cat sem
) (read-token input
)
2729 (t (wf-error input
"element expected")))
2730 (destructuring-bind (&optional name
&rest raw-attrs
) sem
2731 (validate-start-element *ctx
* name
)
2733 (process-attributes *ctx
* name
(build-attribute-list raw-attrs
)))
2734 (*namespace-bindings
* *namespace-bindings
*)
2736 (when sax
:*namespace-processing
*
2737 (setf new-namespaces
(declare-namespaces attrs
))
2738 (mapc #'set-attribute-namespace attrs
))
2739 (push (compute-base attrs
) (base-stack *ctx
*))
2740 (multiple-value-bind (uri prefix local-name
)
2741 (if sax
:*namespace-processing
*
2743 (values nil nil nil
))
2744 (declare (ignore prefix
))
2745 (check-attribute-uniqueness attrs
)
2746 (unless (or sax
:*include-xmlns-attributes
*
2747 (null sax
:*namespace-processing
*))
2749 (remove-if (compose #'xmlns-attr-p
#'sax
:attribute-qname
)
2752 *namespace-bindings
*
2754 uri local-name name attrs
))))))
2756 (defun p/etag
(input qname
)
2757 (multiple-value-bind (cat2 sem2
) (read-token input
)
2758 (unless (and (eq cat2
:etag
)
2759 (eq (car sem2
) qname
))
2760 (wf-error input
"Bad nesting. ~S / ~S"
2762 (mu (cons cat2 sem2
))))
2764 (wf-error input
"no attributes allowed in end tag"))))
2766 ;; copy&paste from cxml-rng
2767 (defun escape-uri (string)
2768 (with-output-to-string (out)
2769 (loop for c across
(cxml::rod-to-utf8-string string
) do
2770 (let ((code (char-code c
)))
2771 ;; http://www.w3.org/TR/xlink/#link-locators
2772 (if (or (>= code
127) (<= code
32) (find c
"<>\"{}|\\^`"))
2773 (format out
"%~2,'0X" code
)
2774 (write-char c out
))))))
2776 (defun compute-base (attrs)
2777 (let ((new (sax:find-attribute
#"xml:base" attrs
))
2778 (current (car (base-stack *ctx
*))))
2780 (puri:merge-uris
(escape-uri (sax:attribute-value new
)) current
)
2783 (defun process-characters (input sem
)
2784 (consume-token input
)
2785 (when (search #"]]>" sem
)
2786 (wf-error input
"']]>' not allowed in CharData"))
2787 (validate-characters *ctx
* sem
))
2789 (defun process-cdata-section (input)
2790 (consume-token input
)
2791 (let ((input (car (zstream-input-stack input
))))
2792 (unless (and (rune= #/C
(read-rune input
))
2793 (rune= #/D
(read-rune input
))
2794 (rune= #/A
(read-rune input
))
2795 (rune= #/T
(read-rune input
))
2796 (rune= #/A
(read-rune input
))
2797 (rune= #/\
[ (read-rune input
)))
2798 (wf-error input
"After '<![', 'CDATA[' is expected."))
2799 (validate-characters *ctx
* #"hack") ;anything other than whitespace
2800 (read-cdata-sect input
)))
2802 (defun p/content
(input)
2803 ;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
2805 (multiple-value-bind (cat sem
) (peek-token input
)
2810 (process-characters input sem
)
2811 (sax:characters
(handler *ctx
*) sem
))
2814 (consume-token input
)
2815 (recurse-on-entity input name
:general
2818 (etypecase (checked-get-entdef name
:general
)
2819 (internal-entdef (p/content input
))
2820 (external-entdef (p/ext-parsed-ent input
)))
2821 (unless (eq (peek-token input
) :eof
)
2822 (wf-error input
"Trailing garbage. - ~S"
2823 (peek-token input
))))))))
2825 (let ((data (process-cdata-section input
)))
2826 (sax:start-cdata
(handler *ctx
*))
2827 (sax:characters
(handler *ctx
*) data
)
2828 (sax:end-cdata
(handler *ctx
*))))
2830 (consume-token input
)
2831 (sax:processing-instruction
(handler *ctx
*) (car sem
) (cdr sem
)))
2833 (consume-token input
)
2834 (sax:comment
(handler *ctx
*) sem
))
2838 ;; [78] extParsedEnt ::= TextDecl? contentw
2839 ;; [79] extPE ::= TextDecl? extSubsetDecl
2841 (defstruct xml-header
2846 (defun p/ext-parsed-ent
(input)
2847 ;; [78] extParsedEnt ::= '<?xml' VersionInfo? EncodingDecl S? '?>' content
2848 (when (eq (peek-token input
) :xml-decl
)
2849 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input
))))))
2850 (setup-encoding input hd
))
2851 (consume-token input
))
2852 (set-full-speed input
)
2855 (defun parse-xml-decl (content)
2856 (let* ((res (make-xml-header))
2857 (i (make-rod-xstream content
)))
2858 (with-zstream (z :input-stack
(list i
))
2859 (let ((atts (read-attribute-list z i t
)))
2860 (unless (eq (peek-rune i
) :eof
)
2861 (wf-error i
"Garbage at end of XMLDecl."))
2862 ;; versioninfo muss da sein
2863 ;; dann ? encodingdecl
2866 (unless (eq (caar atts
) (intern-name '#.
(string-rod "version")))
2867 (wf-error i
"XMLDecl needs version."))
2868 (unless (and (>= (length (cdar atts
)) 1)
2870 (or (rune<= #/a x
#/z
)
2878 (wf-error i
"Bad XML version number: ~S." (rod-string (cdar atts
))))
2879 (setf (xml-header-version res
) (rod-string (cdar atts
)))
2881 (when (eq (caar atts
) (intern-name '#.
(string-rod "encoding")))
2882 (unless (and (>= (length (cdar atts
)) 1)
2884 (or (rune<= #/a x
#/z
)
2892 (or (rune<= #/a x
#/z
)
2893 (rune<= #/A x
#/Z
)))
2894 (aref (cdar atts
) 0)))
2895 (wf-error i
"Bad XML encoding name: ~S." (rod-string (cdar atts
))))
2896 (setf (xml-header-encoding res
) (rod-string (cdar atts
)))
2898 (when (eq (caar atts
) (intern-name '#.
(string-rod "standalone")))
2899 (unless (or (rod= (cdar atts
) '#.
(string-rod "yes"))
2900 (rod= (cdar atts
) '#.
(string-rod "no")))
2901 (wf-error i
"XMLDecl's 'standalone' attribute must be exactly \"yes\" or \"no\" and not ~S."
2902 (rod-string (cdar atts
))))
2903 (setf (xml-header-standalone-p res
)
2904 (if (rod-equal '#.
(string-rod "yes") (cdar atts
))
2909 (wf-error i
"Garbage in XMLDecl: ~A" (rod-string content
)))
2912 (defun parse-text-decl (content)
2913 (let* ((res (make-xml-header))
2914 (i (make-rod-xstream content
)))
2915 (with-zstream (z :input-stack
(list i
))
2916 (let ((atts (read-attribute-list z i t
)))
2917 (unless (eq (peek-rune i
) :eof
)
2918 (wf-error i
"Garbage at end of TextDecl"))
2919 ;; versioninfo optional
2920 ;; encodingdecl muss da sein
2922 (when (eq (caar atts
) (intern-name '#.
(string-rod "version")))
2923 (unless (and (>= (length (cdar atts
)) 1)
2925 (or (rune<= #/a x
#/z
)
2933 (wf-error i
"Bad XML version number: ~S." (rod-string (cdar atts
))))
2934 (setf (xml-header-version res
) (rod-string (cdar atts
)))
2936 (unless (eq (caar atts
) (intern-name '#.
(string-rod "encoding")))
2937 (wf-error i
"TextDecl needs encoding."))
2938 (unless (and (>= (length (cdar atts
)) 1)
2940 (or (rune<= #/a x
#/z
)
2948 (or (rune<= #/a x
#/z
)
2950 (rune<= #/0 x
#/9)))
2951 (aref (cdar atts
) 0)))
2952 (wf-error i
"Bad XML encoding name: ~S." (rod-string (cdar atts
))))
2953 (setf (xml-header-encoding res
) (rod-string (cdar atts
)))
2956 (wf-error i
"Garbage in TextDecl: ~A" (rod-string content
)))))
2959 ;;;; ---------------------------------------------------------------------------
2964 (cond ((stringp x
) x
)
2965 ((vectorp x
) (rod-string x
))
2967 (cons (mu (car x
)) (mu (cdr x
))))
2970 ;;;; ---------------------------------------------------------------------------
2971 ;;;; User interface ;;;;
2973 #-cxml-system
::uri-is-namestring
2974 (defun specific-or (component &optional
(alternative nil
))
2975 (if (eq component
:unspecific
)
2979 (defun string-or (str &optional
(alternative nil
))
2980 (if (zerop (length str
))
2984 #-cxml-system
::uri-is-namestring
2985 (defun make-uri (&rest initargs
&key path query
&allow-other-keys
)
2986 (apply #'make-instance
2988 :path
(and path
(escape-path path
))
2989 :query
(and query
(escape-query query
))
2992 #-cxml-system
::uri-is-namestring
2993 (defun escape-path (list)
2994 (puri::render-parsed-path list t
))
2996 #-cxml-system
::uri-is-namestring
2997 (defun escape-query (pairs)
2998 (flet ((escape (str)
2999 (puri::encode-escaped-encoding str puri
::*reserved-characters
* t
)))
3001 (with-output-to-string (s)
3002 (dolist (pair pairs
)
3006 (write-string (escape (car pair
)) s
)
3008 (write-string (escape (cdr pair
)) s
))))))
3010 #-cxml-system
::uri-is-namestring
3011 (defun uri-parsed-query (uri)
3012 (flet ((unescape (str)
3013 (puri::decode-escaped-encoding str t puri
::*reserved-characters
*)))
3014 (let ((str (puri:uri-query uri
)))
3018 (dolist (s (split-sequence-if (lambda (x) (eql x
#\
&)) str
))
3019 (destructuring-bind (name value
)
3020 (split-sequence-if (lambda (x) (eql x
#\
=)) s
)
3021 (push (cons (unescape name
) (unescape value
)) pairs
)))
3026 #-cxml-system
::uri-is-namestring
3027 (defun query-value (name alist
)
3028 (cdr (assoc name alist
:test
#'equal
)))
3030 #-cxml-system
::uri-is-namestring
3031 (defun pathname-to-uri (pathname)
3033 ;; FIXME: should we really leave ".." in base URIs?
3034 (append (mapcar (lambda (x)
3035 (cond ((member x
'(:up
:back
)) "..")
3037 (pathname-directory pathname
))
3039 (if (specific-or (pathname-type pathname
))
3040 (concatenate 'string
3041 (pathname-name pathname
)
3043 (pathname-type pathname
))
3044 (pathname-name pathname
))))))
3045 (if (eq (car path
) :relative
)
3046 (make-uri :path path
)
3047 (make-uri :scheme
:file
3048 :host
(concatenate 'string
3049 (string-or (host-namestring pathname
))
3051 (specific-or (pathname-device pathname
)))
3054 #+cxml-system
::uri-is-namestring
3055 (defun pathname-to-uri (pathname)
3056 (puri:parse-uri
(namestring pathname
)))
3058 #-cxml-system
::uri-is-namestring
3059 (defun parse-name.type
(str)
3061 (let ((i (position #\. str
:from-end t
)))
3063 (values (subseq str
0 i
) (subseq str
(1+ i
)))
3067 #-cxml-system
::uri-is-namestring
3068 (defun uri-to-pathname (uri)
3069 (let ((scheme (puri:uri-scheme uri
))
3070 (path (puri:uri-parsed-path uri
)))
3071 (unless (member scheme
'(nil :file
))
3072 (error 'xml-parse-error
3073 :format-control
"URI scheme ~S not supported"
3074 :format-arguments
(list scheme
)))
3075 (if (eq (car path
) :relative
)
3076 (multiple-value-bind (name type
)
3077 (parse-name.type
(car (last path
)))
3078 (make-pathname :directory
(butlast path
)
3081 (multiple-value-bind (name type
)
3082 (parse-name.type
(car (last (cdr path
))))
3083 (destructuring-bind (host device
)
3084 (split-sequence-if (lambda (x) (eql x
#\
+))
3085 (or (puri:uri-host uri
) "+"))
3086 (make-pathname :host
(string-or host
)
3087 :device
(string-or device
)
3088 :directory
(cons :absolute
(butlast (cdr path
)))
3091 #+cxml-system
::uri-is-namestring
3092 (defun uri-to-pathname (uri)
3093 (let ((pathname (puri:render-uri uri nil
)))
3094 (when (equalp (pathname-host pathname
) "+")
3095 (setf (slot-value pathname
'lisp
::host
) "localhost"))
3099 (input handler
&rest args
3100 &key validate dtd root entity-resolver disallow-internal-subset
3102 (declare (ignore validate dtd root entity-resolver disallow-internal-subset
3106 for
(name value
) on args by
#'cddr
3107 unless
(eq name
:pathname
)
3108 append
(list name value
))))
3110 (xstream (apply #'parse-xstream input handler args
))
3111 (pathname (apply #'parse-file input handler args
))
3112 (rod (apply #'parse-rod input handler args
))
3113 (array (apply #'parse-octets input handler args
))
3115 (let ((xstream (make-xstream input
:speed
8192)))
3116 (setf (xstream-name xstream
)
3118 :entity-name
"main document"
3121 (pathname-to-uri (merge-pathnames pathname
))
3122 (safe-stream-sysid input
))))
3123 (apply #'parse-xstream xstream handler args
))))))
3125 (defun parse-xstream (xstream handler
&rest args
)
3128 (with-zstream (zstream :input-stack
(list xstream
))
3130 (with-scratch-pads ()
3131 (apply #'p
/document zstream handler args
)))
3132 (runes-encoding:encoding-error
(c)
3133 (wf-error xstream
"~A" c
)))))
3135 (defun parse-file (filename handler
&rest args
)
3136 (with-open-xfile (input filename
)
3137 (setf (xstream-name input
)
3139 :entity-name
"main document"
3141 :uri
(pathname-to-uri (merge-pathnames filename
))))
3142 (apply #'parse-xstream input handler args
)))
3144 (defun resolve-synonym-stream (stream)
3145 (while (typep stream
'synonym-stream
)
3146 (setf stream
(symbol-value (synonym-stream-symbol stream
))))
3149 (defun safe-stream-sysid (stream)
3150 (if (and (typep (resolve-synonym-stream stream
) 'file-stream
)
3151 ;; ignore-errors, because sb-bsd-sockets creates instances of
3152 ;; FILE-STREAMs that aren't
3153 (ignore-errors (pathname stream
)))
3154 (pathname-to-uri (merge-pathnames (pathname stream
)))
3157 (defun parse-stream (stream handler
&rest args
)
3161 :name
(make-stream-name
3162 :entity-name
"main document"
3164 :uri
(safe-stream-sysid stream
))
3166 (apply #'parse-xstream xstream handler args
)))
3168 (defun parse-empty-document
3169 (uri qname handler
&key public-id system-id entity-resolver
(recode t
))
3170 (check-type uri
(or null rod
))
3171 (check-type qname
(or null rod
))
3172 (check-type public-id
(or null rod
))
3173 (check-type system-id
(or null puri
:uri
))
3174 (check-type entity-resolver
(or null function symbol
))
3175 (check-type recode boolean
)
3178 (setf handler
(make-recoder handler
#'rod-to-utf8-string
)))
3180 (make-context :handler handler
:entity-resolver entity-resolver
))
3183 (when (or public-id system-id
)
3184 (extid-using-catalog (make-extid public-id system-id
)))))
3185 (sax:start-document handler
)
3187 (sax:start-dtd handler
3190 (and system-id
(uri-rod system-id
)))
3191 (setf (dtd *ctx
*) (getdtd (extid-system extid
) *dtd-cache
*))
3193 (with-scratch-pads ()
3194 (let ((*data-behaviour
* :DTD
))
3195 (let ((xi2 (xstream-open-extid extid
)))
3196 (with-zstream (zi2 :input-stack
(list xi2
))
3198 (p/ext-subset zi2
))))))
3199 (sax:end-dtd handler
)
3200 (let ((dtd (dtd *ctx
*)))
3201 (sax:entity-resolver handler
(lambda (n h
) (resolve-entity n h dtd
)))
3202 (sax::dtd handler dtd
)))
3204 (when (or uri qname
)
3207 (list (sax:make-attribute
:qname
#"xmlns"
3210 (*namespace-bindings
* *namespace-bindings
*)
3212 (when sax
:*namespace-processing
*
3213 (setf new-namespaces
(declare-namespaces attrs
))
3214 (mapc #'set-attribute-namespace attrs
))
3215 (multiple-value-bind (uri prefix local-name
)
3216 (if sax
:*namespace-processing
* (decode-qname qname
) nil
)
3217 (declare (ignore prefix
))
3218 (unless (or sax
:*include-xmlns-attributes
*
3219 (null sax
:*namespace-processing
*))
3221 (sax:start-element
(handler *ctx
*) uri local-name qname attrs
)
3222 (sax:end-element
(handler *ctx
*) uri local-name qname
))
3223 (undeclare-namespaces new-namespaces
)))
3224 (sax:end-document handler
)))
3226 (defun parse-dtd-file (filename &optional handler
)
3227 (with-open-file (s filename
:element-type
'(unsigned-byte 8))
3228 (parse-dtd-stream s handler
)))
3230 (defun parse-dtd-stream (stream &optional handler
)
3231 (let ((input (make-xstream stream
)))
3232 (setf (xstream-name input
)
3236 :uri
(safe-stream-sysid stream
)))
3237 (let ((*ctx
* (make-context :handler handler
))
3239 (*data-behaviour
* :DTD
))
3240 (with-zstream (zstream :input-stack
(list input
))
3241 (with-scratch-pads ()
3244 (p/ext-subset zstream
)
3247 (defun parse-rod (string handler
&rest args
)
3248 (let ((xstream (string->xstream string
)))
3249 (setf (xstream-name xstream
)
3251 :entity-name
"main document"
3254 (apply #'parse-xstream xstream handler args
)))
3256 (defun string->xstream
(string)
3257 (make-rod-xstream (string-rod string
)))
3259 (defun parse-octets (octets handler
&rest args
)
3260 (apply #'parse-stream
(make-octet-input-stream octets
) handler args
))
3264 (defun zstream-push (new-xstream zstream
)
3265 (cond ((find-if (lambda (x)
3267 (eql (stream-name-entity-name (xstream-name x
))
3268 (stream-name-entity-name (xstream-name new-xstream
)))
3269 (eql (stream-name-entity-kind (xstream-name x
))
3270 (stream-name-entity-kind (xstream-name new-xstream
)))))
3271 (zstream-input-stack zstream
))
3272 (wf-error zstream
"Infinite recursion.")))
3273 (push new-xstream
(zstream-input-stack zstream
))
3276 (defun recurse-on-entity (zstream name kind continuation
&optional internalp
)
3277 (assert (not (zstream-token-category zstream
)))
3278 (call-with-entity-expansion-as-stream
3280 (lambda (new-xstream)
3281 (push :stop
(zstream-input-stack zstream
))
3282 (zstream-push new-xstream zstream
)
3284 (funcall continuation zstream
)
3285 (assert (eq (peek-token zstream
) :eof
))
3286 (assert (eq (pop (zstream-input-stack zstream
)) new-xstream
))
3287 (close-xstream new-xstream
)
3288 (assert (eq (pop (zstream-input-stack zstream
)) :stop
))
3289 (setf (zstream-token-category zstream
) nil
)
3290 '(consume-token zstream
)) )
3296 (defmacro read-data-until
* ((predicate input res res-start res-end
) &body body
)
3297 ;; fast variant -- for now disabled for no apparent reason
3298 ;; -> res, res-start, res-end
3299 `(let* ((rptr (xstream-read-ptr ,input
))
3301 (fptr (xstream-fill-ptr ,input
))
3302 (buf (xstream-buffer ,input
))
3303 ,res
,res-start
,res-end
)
3304 (declare (type fixnum rptr fptr p0
)
3305 (type (simple-array read-element
(*)) buf
))
3307 (cond ((%
= rptr fptr
)
3308 ;; underflow -- hmm inject the scratch-pad with what we
3309 ;; read and continue, while using read-rune and collecting
3310 ;; d.h. besser waere hier auch while-reading zu benutzen.
3311 (setf (xstream-read-ptr ,input
) rptr
)
3312 (multiple-value-setq (,res
,res-start
,res-end
)
3313 (with-rune-collector/raw
(collect)
3314 (do ((i p0
(%
+ i
1)))
3316 (collect (%rune buf i
)))
3319 (cond ((%
= rptr fptr
)
3320 (setf (xstream-read-ptr ,input
) rptr
)
3321 (setf c
(peek-rune input
))
3324 (setf rptr
(xstream-read-ptr ,input
)
3325 fptr
(xstream-fill-ptr ,input
)
3326 buf
(xstream-buffer ,input
)))
3328 (setf c
(%rune buf rptr
))))
3329 (cond ((,predicate c
)
3331 (setf (xstream-read-ptr ,input
) rptr
)
3336 (setf rptr
(%
+ rptr
1))) )))))
3338 ((,predicate
(%rune buf rptr
))
3340 (setf (xstream-read-ptr ,input
) rptr
)
3341 (setf ,res buf
,res-start p0
,res-end rptr
)
3345 (sf rptr
(%
+ rptr
1))) ))
3349 (defmacro read-data-until
* ((predicate input res res-start res-end
) &body body
)
3350 "Read data from `input' until `predicate' applied to the read char
3351 turns true. Then execute `body' with `res', `res-start', `res-end'
3352 bound to denote a subsequence (of RUNEs) containing the read portion.
3353 The rune upon which `predicate' turned true is neither consumed from
3354 the stream, nor included in `res'.
3356 Keep the predicate short, this it may be included more than once into
3357 the macro's expansion."
3359 (let ((input-var (gensym))
3362 `(LET ((,input-var
,input
))
3363 (MULTIPLE-VALUE-BIND (,res
,res-start
,res-end
)
3364 (WITH-RUNE-COLLECTOR/RAW
(,collect
)
3366 (LET ((,c
(PEEK-RUNE ,input-var
)))
3368 ;; xxx error message
3370 ((FUNCALL ,predicate
,c
)
3374 (CONSUME-RUNE ,input-var
))))))
3378 (defun read-name-token (input)
3379 (read-data-until* ((lambda (rune)
3380 (declare (type rune rune
))
3381 (not (name-rune-p rune
)))
3384 (intern-name r rs re
)))
3386 (defun read-cdata (input)
3387 (read-data-until* ((lambda (rune)
3388 (declare (type rune rune
))
3389 (when (and (%rune
< rune
#/U
+0020)
3390 (not (or (%rune
= rune
#/U
+0009)
3391 (%rune
= rune
#/U
+000a
)
3392 (%rune
= rune
#/U
+000d
))))
3393 (wf-error input
"code point invalid: ~A" rune
))
3394 (or (%rune
= rune
#/<) (%rune
= rune
#/&)))
3398 (declare (type (simple-array rune
(*)) source
)
3399 (type ufixnum start
)
3401 (optimize (speed 3) (safety 0)))
3402 (let ((res (make-array (%- end start
) :element-type
'rune
)))
3403 (declare (type (simple-array rune
(*)) res
))
3404 (let ((i (%- end start
)))
3405 (declare (type ufixnum i
))
3408 (setf (%rune res i
) (%rune source
(the ufixnum
(+ i start
))))
3413 ;; used only by read-att-value-2
3414 (defun internal-entity-expansion (name)
3415 (let ((def (get-entity-definition name
:general
(dtd *ctx
*))))
3417 (wf-error nil
"Entity '~A' is not defined." (rod-string name
)))
3418 (unless (typep def
'internal-entdef
)
3419 (wf-error nil
"Entity '~A' is not an internal entity." name
))
3420 (or (entdef-expansion def
)
3421 (setf (entdef-expansion def
) (find-internal-entity-expansion name
)))))
3423 ;; used only by read-att-value-2
3424 (defun find-internal-entity-expansion (name)
3425 (with-zstream (zinput)
3426 (with-rune-collector-3 (collect)
3427 (labels ((muffle (input)
3430 (setf c
(read-rune input
))
3434 (setf c
(peek-rune input
))
3438 (let ((c (read-character-reference input
)))
3439 (%put-unicode-char c collect
)))
3441 (unless (name-start-rune-p c
)
3442 (wf-error zinput
"Expecting name after &."))
3443 (let ((name (read-name-token input
)))
3444 (setf c
(read-rune input
))
3445 (check-rune input c
#/\
;)
3447 zinput name
:general
3449 (muffle (car (zstream-input-stack zinput
)))))))))
3451 (wf-error zinput
"unexpected #\/<"))
3454 ((not (data-rune-p c
))
3455 (wf-error zinput
"illegal char: ~S." c
))
3458 (declare (dynamic-extent #'muffle
))
3460 zinput name
:general
3462 (muffle (car (zstream-input-stack zinput
)))))))))
3465 (defun resolve-entity (name handler dtd
)
3466 (let ((*validate
* nil
))
3467 (if (get-entity-definition name
:general dtd
)
3468 (let* ((*ctx
* (make-context :handler handler
:dtd dtd
))
3469 (*data-behaviour
* :DOC
))
3470 (with-zstream (input)
3471 (with-scratch-pads ()
3476 (etypecase (checked-get-entdef name
:general
)
3477 (internal-entdef (p/content input
))
3478 (external-entdef (p/ext-parsed-ent input
)))
3479 (unless (eq (peek-token input
) :eof
)
3480 (wf-error input
"Trailing garbage. - ~S"
3481 (peek-token input
)))))))))
3484 (defun read-att-value-2 (input)
3485 (let ((delim (read-rune input
)))
3486 (when (eql delim
:eof
)
3488 (unless (member delim
'(#/\" #/\') :test
#'eql
)
3490 "Bad attribute value delimiter ~S, must be either #\\\" or #\\\'."
3492 (with-rune-collector-4 (collect)
3494 (let ((c (read-rune input
)))
3500 (wf-error input
"'<' not allowed in attribute values"))
3502 (multiple-value-bind (kind sem
) (read-entity-like input
)
3504 (:CHARACTER-REFERENCE
3505 (%put-unicode-char sem collect
))
3507 (let* ((exp (internal-entity-expansion sem
))
3509 (declare (type (simple-array rune
(*)) exp
))
3510 (do ((i 0 (%
+ i
1)))
3512 (collect (%rune exp i
))))))))
3522 ;; We already know that name is part of a valid XML name, so all we
3523 ;; have to check is that the first rune is a name-start-rune and that
3524 ;; there is not colon in it.
3525 (defun nc-name-p (name)
3526 (and (plusp (length name
))
3527 (name-start-rune-p (rune name
0))
3528 (notany #'(lambda (rune) (rune= #/: rune
)) name
)))
3530 (defun split-qname (qname)
3531 (declare (type runes
:simple-rod qname
))
3532 (let ((pos (position #/: qname
)))
3534 (let ((prefix (subseq qname
0 pos
))
3535 (local-name (subseq qname
(1+ pos
))))
3537 (wf-error nil
"empty namespace prefix"))
3538 (if (nc-name-p local-name
)
3539 (values prefix local-name
)
3540 (wf-error nil
"~S is not a valid NcName."
3541 (rod-string local-name
))))
3542 (values () qname
))))
3544 (defun decode-qname (qname)
3545 "decode-qname name => namespace-uri, prefix, local-name"
3546 (declare (type runes
:simple-rod qname
))
3547 (multiple-value-bind (prefix local-name
) (split-qname qname
)
3548 (let ((uri (find-namespace-binding prefix
)))
3550 (values uri prefix local-name
)
3551 (values nil nil qname
)))))
3554 (defun find-namespace-binding (prefix)
3555 (cdr (or (assoc (or prefix
#"") *namespace-bindings
* :test
#'rod
=)
3556 (wf-error nil
"Undeclared namespace prefix: ~A" (rod-string prefix
)))))
3558 ;; FIXME: Should probably be refactored by adding :start and :end to rod=/rod-equal
3559 (defun rod-starts-with (prefix rod
)
3560 (and (<= (length prefix
) (length rod
))
3561 (dotimes (i (length prefix
) t
)
3562 (unless (rune= (rune prefix i
) (rune rod i
))
3565 (defun xmlns-attr-p (attr-name)
3566 (rod-starts-with #.
(string-rod "xmlns") attr-name
))
3568 (defun attrname->prefix
(attrname)
3569 (if (< 5 (length attrname
))
3573 (defun find-namespace-declarations (attributes)
3575 for attribute in attributes
3576 for qname
= (sax:attribute-qname attribute
)
3577 when
(xmlns-attr-p qname
)
3578 collect
(cons (attrname->prefix qname
) (sax:attribute-value attribute
))))
3580 (defun declare-namespaces (attributes)
3581 (let ((ns-decls (find-namespace-declarations attributes
)))
3582 (dolist (ns-decl ns-decls
)
3583 ;; check some namespace validity constraints
3584 (let ((prefix (car ns-decl
))
3585 (uri (cdr ns-decl
)))
3587 ((and (rod= prefix
#"xml")
3588 (not (rod= uri
#"http://www.w3.org/XML/1998/namespace")))
3590 "Attempt to rebind the prefix \"xml\" to ~S." (mu uri
)))
3591 ((and (rod= uri
#"http://www.w3.org/XML/1998/namespace")
3592 (not (rod= prefix
#"xml")))
3595 URI \"http://www.w3.org/XML/1998/namespace\" may not ~
3596 be bound to the prefix ~S, only \"xml\" is legal."
3598 ((and (rod= prefix
#"xmlns")
3599 (rod= uri
#"http://www.w3.org/2000/xmlns/"))
3601 "Attempt to bind the prefix \"xmlns\" to its predefined ~
3602 URI \"http://www.w3.org/2000/xmlns/\", which is ~
3603 forbidden for no good reason."))
3604 ((rod= prefix
#"xmlns")
3606 "Attempt to bind the prefix \"xmlns\" to the URI ~S, ~
3607 but it may not be declared." (mu uri
)))
3608 ((rod= uri
#"http://www.w3.org/2000/xmlns/")
3610 "The namespace URI \"http://www.w3.org/2000/xmlns/\" may ~
3611 not be bound to prefix ~S (or any other)." (mu prefix
)))
3612 ((and (rod= uri
#"") prefix
)
3614 "Only the default namespace (the one without a prefix) ~
3615 may be bound to an empty namespace URI, thus ~
3618 (push (cons prefix
(if (rod= #"" uri
) nil uri
))
3619 *namespace-bindings
*)
3620 (sax:start-prefix-mapping
(handler *ctx
*)
3625 (defun undeclare-namespaces (ns-decls)
3626 (dolist (ns-decl ns-decls
)
3627 (sax:end-prefix-mapping
(handler *ctx
*) (car ns-decl
))))
3629 (defun build-attribute-list (attr-alist)
3630 ;; fixme: if there is a reason this function reverses attribute order,
3631 ;; it should be documented.
3633 (dolist (pair attr-alist
)
3634 (push (sax:make-attribute
:qname
(car pair
)
3640 (defun check-attribute-uniqueness (attributes)
3641 ;; 5.3 Uniqueness of Attributes
3642 ;; In XML documents conforming to [the xmlns] specification, no
3643 ;; tag may contain two attributes which:
3644 ;; 1. have identical names, or
3645 ;; 2. have qualified names with the same local part and with
3646 ;; prefixes which have been bound to namespace names that are
3649 ;; 1. is checked by read-tag-2, so we only deal with 2 here
3650 (loop for
(attr-1 . rest
) on attributes do
3651 (when (and (sax:attribute-namespace-uri attr-1
)
3652 (find-if (lambda (attr-2)
3653 (and (rod= (sax:attribute-namespace-uri attr-1
)
3654 (sax:attribute-namespace-uri attr-2
))
3655 (rod= (sax:attribute-local-name attr-1
)
3656 (sax:attribute-local-name attr-2
))))
3659 "Multiple definitions of attribute ~S in namespace ~S."
3660 (mu (sax:attribute-local-name attr-1
))
3661 (mu (sax:attribute-namespace-uri attr-1
))))))
3663 (defun set-attribute-namespace (attribute)
3664 (let ((qname (sax:attribute-qname attribute
)))
3665 (if (and sax
:*use-xmlns-namespace
* (rod= qname
#"xmlns"))
3666 (setf (sax:attribute-namespace-uri attribute
)
3667 #"http://www.w3.org/2000/xmlns/")
3668 (multiple-value-bind (prefix local-name
) (split-qname qname
)
3669 (when (and prefix
;; default namespace doesn't apply to attributes
3670 (or (not (rod= #"xmlns" prefix
))
3671 sax
:*use-xmlns-namespace
*))
3672 (setf (sax:attribute-namespace-uri attribute
)
3673 (decode-qname qname
)))
3674 (setf (sax:attribute-local-name attribute
) local-name
)))))
3678 ;; System Identifier Protocol
3680 ;; A system identifier is an object obeying to the system identifier
3681 ;; protocol. Often something like an URL or a pathname.
3683 ;; OPEN-SYS-ID sys-id [generic function]
3685 ;; Opens the resource associated with the system identifier `sys-id'
3686 ;; for reading and returns a stream. For now it is expected, that the
3687 ;; stream is an octet stream (one of element type (unsigned-byte 8)).
3689 ;; More precisely: The returned object only has to obey to the xstream
3690 ;; controller protocol. (That is it has to provide implementations for
3691 ;; READ-OCTETS and XSTREAM-CONTROLLER-CLOSE).
3693 ;; MERGE-SYS-ID sys-id base [generic function]
3695 ;; Merges two system identifiers. That is resolve `sys-id' relative to
3696 ;; `base' yielding an absolute system identifier suitable for
3701 ;;; SAX validation handler
3703 (defclass validator
()
3704 ((context :initarg
:context
:accessor context
)
3705 (cdatap :initform nil
:accessor cdatap
)))
3707 (defun make-validator (dtd root
)
3708 (make-instance 'validator
3709 :context
(make-context
3712 :model-stack
(list (make-root-model root
)))))
3714 (macrolet ((with-context ((validator) &body body
)
3715 `(let ((*ctx
* (context ,validator
))
3717 (with-scratch-pads () ;nicht schoen
3719 (defmethod sax:start-element
((handler validator
) uri lname qname attributes
)
3721 (with-context (handler)
3722 (validate-start-element *ctx
* qname
)
3723 (process-attributes *ctx
* qname attributes
)))
3725 (defmethod sax:start-cdata
((handler validator
))
3726 (setf (cdatap handler
) t
))
3728 (defmethod sax:characters
((handler validator
) data
)
3729 (with-context (handler)
3730 (validate-characters *ctx
* (if (cdatap handler
) #"hack" data
))))
3732 (defmethod sax:end-cdata
((handler validator
))
3733 (setf (cdatap handler
) nil
))
3735 (defmethod sax:end-element
((handler validator
) uri lname qname
)
3737 (with-context (handler)
3738 (validate-end-element *ctx
* qname
))))