Fixed condition class in not-wellformed document with the STP builder
[cxml.git] / xml / xml-parse.lisp
blobeb8e33884f9edbb889a5ee563525684f5bb535db
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: XML parser
4 ;;; Created: 1999-07-17
5 ;;; Author: Gilbert Baumann <unk6@rz.uni-karlsruhe.de>
6 ;;; Author: Henrik Motakef <hmot@henrik-motakef.de>
7 ;;; Author: David Lichteblau <david@lichteblau.com>
8 ;;; License: Lisp-LGPL (See file COPYING for details).
9 ;;; ---------------------------------------------------------------------------
10 ;;; (c) copyright 1999 by Gilbert Baumann
11 ;;; (c) copyright 2003 by Henrik Motakef
12 ;;; (c) copyright 2004 knowledgeTools Int. GmbH
13 ;;; (c) copyright 2004 David Lichteblau
14 ;;; (c) copyright 2005 David Lichteblau
16 ;;; This library is free software; you can redistribute it and/or
17 ;;; modify it under the terms of the GNU Library General Public
18 ;;; License as published by the Free Software Foundation; either
19 ;;; version 2 of the License, or (at your option) any later version.
20 ;;;
21 ;;; This library is distributed in the hope that it will be useful,
22 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 ;;; Library General Public License for more details.
25 ;;;
26 ;;; You should have received a copy of the GNU Library General Public
27 ;;; License along with this library; if not, write to the
28 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
29 ;;; Boston, MA 02111-1307 USA.
31 ;;; Streams
33 ;;; xstreams
35 ;; For reading runes, I defined my own streams, called xstreams,
36 ;; because we want to be fast. A function call or even a method call
37 ;; per character is not acceptable, instead of that we define a
38 ;; buffered stream with and advertised buffer layout, so that we
39 ;; could use the trick stdio uses: READ-RUNE and PEEK-RUNE are macros,
40 ;; directly accessing the buffer and only calling some underflow
41 ;; handler in case of stream underflows. This will yield to quite a
42 ;; performance boost vs calling READ-BYTE per character.
44 ;; Also we need to do encoding t conversion on ; this better done at large chunks of data rather than on a character
45 ;; by character basis. This way we need a dispatch on the active
46 ;; encoding only once in a while, instead of for each character. This
47 ;; allows us to use a CLOS interface to do the underflow handling.
49 ;;; zstreams
51 ;; Now, for reading tokens, we define another kind of streams, called
52 ;; zstreams. These zstreams also maintain an input stack to implement
53 ;; inclusion of external entities. This input stack contains xstreams
54 ;; or the special marker :STOP. Such a :STOP marker indicates, that
55 ;; input should not continue there, but well stop; that is simulate an
56 ;; EOF. The user is then responsible to pop this marker off the input
57 ;; stack.
59 ;; This input stack is also used to detect circular entity inclusion.
61 ;; The zstream tokenizer recognizes the following types of tokens and
62 ;; is controlled by the *DATA-BEHAVIOUR* flag. (Which should become a
63 ;; slot of zstreams instead).
65 ;; Common
66 ;; :xml-decl (<target> . <content>) ;processing-instruction starting with "<?xml"
67 ;; :pi (<target> . <content>) ;processing-instruction
68 ;; :stag (<name> . <atts>) ;start tag
69 ;; :etag (<name> . <atts>) ;end tag
70 ;; :ztag (<name> . <atts>) ;empty tag
71 ;; :<!ELEMENT
72 ;; :<!ENTITY
73 ;; :<!ATTLIST
74 ;; :<!NOTATION
75 ;; :<!DOCTYPE
76 ;; :<![
77 ;; :comment <content>
79 ;; *data-behaviour* = :DTD
81 ;; :nmtoken <interned-rod>
82 ;; :#required
83 ;; :#implied
84 ;; :#fixed
85 ;; :#pcdata
86 ;; :s
87 ;; :\[ :\] :\( :\) :|\ :\> :\" :\' :\, :\? :\* :\+
89 ;; *data-behaviour* = :DOC
91 ;; :entity-ref <interned-rod>
92 ;; :cdata <rod>
95 ;;; TODO
97 ;; o provide for a faster DOM
99 ;; o morph zstream into a context object and thus also get rid of
100 ;; special variables. Put the current DTD there too.
101 ;; [partly done]
103 ;; o the *scratch-pad* hack should become something much more
104 ;; reentrant, we could either define a system-wide resource
105 ;; or allocate some scratch-pads per context.
106 ;; [for thread-safety reasons the array are allocated per context now,
107 ;; reentrancy is still open]
109 ;; o CR handling in utf-16 deocders
111 ;; o UCS-4 reader
113 ;; o max depth together with circle detection
114 ;; (or proof, that our circle detection is enough).
115 ;; [gemeint ist zstream-push--david]
117 ;; o better extensibility wrt character representation, one may want to
118 ;; have
119 ;; - UCS-4 in vectoren
121 ;; o xstreams auslagern, documententieren und dann auch in SGML und
122 ;; CSS parser verwenden. (halt alles was zeichen liest).
123 ;; [ausgelagert sind sie; dokumentiert "so la la"; die Reintegration
124 ;; in Closure ist ein ganz anderes Thema]
126 ;; o recording of source locations for nodes.
128 ;; o based on the DTD and xml:space attribute implement HTML white
129 ;; space rules.
131 ;; o on a parser option, do not expand external entities.
133 ;;;; Validity constraints:
134 ;;;; (00) Root Element Type like (03), c.f. MAKE-ROOT-MODEL
135 ;;;; (01) Proper Declaration/PE Nesting P/MARKUP-DECL
136 ;;;; (02) Standalone Document Declaration all over the place [*]
137 ;;;; (03) Element Valid VALIDATE-*-ELEMENT, -CHARACTERS
138 ;;;; (04) Attribute Value Type VALIDATE-ATTRIBUTE
139 ;;;; (05) Unique Element Type Declaration DEFINE-ELEMENT
140 ;;;; (06) Proper Group/PE Nesting P/CSPEC
141 ;;;; (07) No Duplicate Types LEGAL-CONTENT-MODEL-P
142 ;;;; (08) ID VALIDATE-ATTRIBUTE
143 ;;;; (09) One ID per Element Type DEFINE-ATTRIBUTE
144 ;;;; (10) ID Attribute Default DEFINE-ATTRIBUTE
145 ;;;; (11) IDREF VALIDATE-ATTRIBUTE, P/DOCUMENT
146 ;;;; (12) Entity Name VALIDATE-ATTRIBUTE
147 ;;;; (13) Name Token VALIDATE-ATTRIBUTE
148 ;;;; (14) Notation Attributes VALIDATE-ATTRIBUTE, P/ATT-TYPE
149 ;;;; (15) One Notation Per Element Type DEFINE-ATTRIBUTE
150 ;;;; (16) No Notation on Empty Element DEFINE-ELEMENT, -ATTRIBUTE
151 ;;;; (17) Enumeration VALIDATE-ATTRIBUTE
152 ;;;; (18) Required Attribute PROCESS-ATTRIBUTES
153 ;;;; (19) Attribute Default Legal DEFINE-ATTRIBUTE
154 ;;;; (20) Fixed Attribute Default VALIDATE-ATTRIBUTE
155 ;;;; (21) Proper Conditional Section/PE Nesting P/CONDITIONAL-SECT, ...
156 ;;;; (22) Entity Declared [**]
157 ;;;; (23) Notation Declared P/ENTITY-DEF, P/DOCUMENT
158 ;;;; (24) Unique Notation Name DEFINE-NOTATION
159 ;;;;
160 ;;;; [*] Perhaps we could revert the explicit checks of (02), if we did
161 ;;;; _not_ read external subsets of standalone documents when parsing in
162 ;;;; validating mode. Violations of VC (02) constraints would then appear as
163 ;;;; wellformedness violations, right?
164 ;;;;
165 ;;;; [**] Although I haven't investigated this properly yet, I believe that
166 ;;;; we check this VC together with the WFC even in non-validating mode.
168 (in-package :cxml)
170 #+allegro
171 (setf (excl:named-readtable :runes) *readtable*)
173 (eval-when (:compile-toplevel :load-toplevel :execute)
174 (defparameter *fast* '(optimize (speed 3) (safety 0)))
175 ;;(defparameter *fast* '(optimize (speed 2) (safety 3)))
178 ;;; parser context
180 (defvar *ctx* nil)
182 (defstruct (context (:conc-name nil))
183 handler
184 (dtd nil)
185 model-stack
186 ;; xml:base machen wir fuer klacks mal gleich als expliziten stack:
187 base-stack
188 (referenced-notations '())
189 (id-table (%make-rod-hash-table))
190 ;; FIXME: Wofuer ist name-hashtable da? Will man das wissen?
191 (name-hashtable (make-rod-hashtable :size 2000))
192 (standalone-p nil)
193 (entity-resolver nil)
194 (disallow-internal-subset nil)
195 main-zstream)
197 (defvar *expand-pe-p* nil)
199 (defparameter *initial-namespace-bindings*
200 '((#"" . nil)
201 (#"xmlns" . #"http://www.w3.org/2000/xmlns/")
202 (#"xml" . #"http://www.w3.org/XML/1998/namespace")))
204 (defparameter *namespace-bindings* *initial-namespace-bindings*)
206 ;;;; ---------------------------------------------------------------------------
207 ;;;; xstreams
208 ;;;;
211 (defstruct (stream-name
212 (:print-function print-stream-name))
213 entity-name
214 entity-kind
215 uri)
217 (defun print-stream-name (object stream depth)
218 (declare (ignore depth))
219 (format stream "[~A ~S ~A]"
220 (rod-string (stream-name-entity-name object))
221 (stream-name-entity-kind object)
222 (stream-name-uri object)))
224 (deftype read-element () 'rune)
226 (defun call-with-open-xstream (fn stream)
227 (unwind-protect
228 (funcall fn stream)
229 (close-xstream stream)))
231 (defmacro with-open-xstream ((var value) &body body)
232 `(call-with-open-xstream (lambda (,var) ,@body) ,value))
234 (defun call-with-open-xfile (continuation &rest open-args)
235 (let ((input (apply #'open (car open-args) :element-type '(unsigned-byte 8) (cdr open-args))))
236 (unwind-protect
237 (progn
238 (funcall continuation (make-xstream input)))
239 (close input))))
241 (defmacro with-open-xfile ((stream &rest open-args) &body body)
242 `(call-with-open-xfile (lambda (,stream) .,body) .,open-args))
244 ;;;; -------------------------------------------------------------------
245 ;;;; Rechnen mit Runen
246 ;;;;
248 ;; Let us first define fast fixnum arithmetric get rid of type
249 ;; checks. (After all we know what we do here).
251 (defmacro fx-op (op &rest xs)
252 `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
253 (defmacro fx-pred (op &rest xs)
254 `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
256 (defmacro %+ (&rest xs) `(fx-op + ,@xs))
257 (defmacro %- (&rest xs) `(fx-op - ,@xs))
258 (defmacro %* (&rest xs) `(fx-op * ,@xs))
259 (defmacro %/ (&rest xs) `(fx-op floor ,@xs))
260 (defmacro %and (&rest xs) `(fx-op logand ,@xs))
261 (defmacro %ior (&rest xs) `(fx-op logior ,@xs))
262 (defmacro %xor (&rest xs) `(fx-op logxor ,@xs))
263 (defmacro %ash (&rest xs) `(fx-op ash ,@xs))
264 (defmacro %mod (&rest xs) `(fx-op mod ,@xs))
266 (defmacro %= (&rest xs) `(fx-pred = ,@xs))
267 (defmacro %<= (&rest xs) `(fx-pred <= ,@xs))
268 (defmacro %>= (&rest xs) `(fx-pred >= ,@xs))
269 (defmacro %< (&rest xs) `(fx-pred < ,@xs))
270 (defmacro %> (&rest xs) `(fx-pred > ,@xs))
272 ;;; XXX Geschwindigkeit dieser Definitionen untersuchen!
274 (defmacro rune-op (op &rest xs)
275 `(code-rune (,op ,@(mapcar (lambda (x) `(rune-code ,x)) xs))))
276 (defmacro rune-pred (op &rest xs)
277 `(,op ,@(mapcar (lambda (x) `(rune-code ,x)) xs)))
279 (defmacro %rune+ (&rest xs) `(rune-op + ,@xs))
280 (defmacro %rune- (&rest xs) `(rune-op - ,@xs))
281 (defmacro %rune* (&rest xs) `(rune-op * ,@xs))
282 (defmacro %rune/ (&rest xs) `(rune-op floor ,@xs))
283 (defmacro %rune-and (&rest xs) `(rune-op logand ,@xs))
284 (defmacro %rune-ior (&rest xs) `(rune-op logior ,@xs))
285 (defmacro %rune-xor (&rest xs) `(rune-op logxor ,@xs))
286 (defmacro %rune-ash (a b) `(code-rune (ash (rune-code ,a) ,b)))
287 (defmacro %rune-mod (&rest xs) `(rune-op mod ,@xs))
289 (defmacro %rune= (&rest xs) `(rune-pred = ,@xs))
290 (defmacro %rune<= (&rest xs) `(rune-pred <= ,@xs))
291 (defmacro %rune>= (&rest xs) `(rune-pred >= ,@xs))
292 (defmacro %rune< (&rest xs) `(rune-pred < ,@xs))
293 (defmacro %rune> (&rest xs) `(rune-pred > ,@xs))
295 ;;;; ---------------------------------------------------------------------------
296 ;;;; rod hashtable
297 ;;;;
299 ;;; make-rod-hashtable
300 ;;; rod-hash-get hashtable rod &optional start end -> value ; successp
301 ;;; (setf (rod-hash-get hashtable rod &optional start end) new-value
304 (defstruct (rod-hashtable (:constructor make-rod-hashtable/low))
305 size ;size of table
306 table ;
309 (defun make-rod-hashtable (&key (size 200))
310 (setf size (nearest-greater-prime size))
311 (make-rod-hashtable/low
312 :size size
313 :table (make-array size :initial-element nil)))
315 (eval-when (:compile-toplevel :load-toplevel :execute)
316 (defconstant +fixnum-bits+
317 (1- (integer-length most-positive-fixnum))
318 "Pessimistic approximation of the number of bits of fixnums.")
320 (defconstant +fixnum-mask+
321 (1- (expt 2 +fixnum-bits+))
322 "Pessimistic approximation of the largest bit-mask, still being a fixnum."))
324 (definline stir (a b)
325 (%and +fixnum-mask+
326 (%xor (%ior (%ash (%and a #.(ash +fixnum-mask+ -5)) 5)
327 (%ash a #.(- 5 +fixnum-bits+)))
328 b)))
330 (definline rod-hash (rod start end)
331 "Compute a hash code out of a rod."
332 (let ((res (%- end start)))
333 (do ((i start (%+ i 1)))
334 ((%= i end))
335 (declare (type fixnum i))
336 (setf res (stir res (rune-code (%rune rod i)))))
337 res))
339 (definline rod=* (x y &key (start1 0) (end1 (length x))
340 (start2 0) (end2 (length y)))
341 (and (%= (%- end1 start1) (%- end2 start2))
342 (do ((i start1 (%+ i 1))
343 (j start2 (%+ j 1)))
344 ((%= i end1)
346 (unless (rune= (%rune x i) (%rune y j))
347 (return nil)))))
349 (definline rod=** (x y start1 end1 start2 end2)
350 (and (%= (%- end1 start1) (%- end2 start2))
351 (do ((i start1 (%+ i 1))
352 (j start2 (%+ j 1)))
353 ((%= i end1)
355 (unless (rune= (%rune x i) (%rune y j))
356 (return nil)))))
358 (defun rod-hash-get (hashtable rod &optional (start 0) (end (length rod)))
359 (declare (type (simple-array rune (*)) rod))
360 (let ((j (%mod (rod-hash rod start end)
361 (rod-hashtable-size hashtable))))
362 (dolist (q (svref (rod-hashtable-table hashtable) j)
363 (values nil nil nil))
364 (declare (type cons q))
365 (when (rod=** (car q) rod 0 (length (the (simple-array rune (*)) (car q))) start end)
366 (return (values (cdr q) t (car q)))))))
368 (defun rod-hash-set (new-value hashtable rod &optional (start 0) (end (length rod)))
369 (let ((j (%mod (rod-hash rod start end)
370 (rod-hashtable-size hashtable)))
371 (key nil))
372 (dolist (q (svref (rod-hashtable-table hashtable) j)
373 (progn
374 (setf key (rod-subseq* rod start end))
375 (push (cons key new-value)
376 (aref (rod-hashtable-table hashtable) j))))
377 (when (rod=* (car q) rod :start2 start :end2 end)
378 (setf key (car q))
379 (setf (cdr q) new-value)
380 (return)))
381 (values new-value key)))
383 #-rune-is-character
384 (defun rod-subseq* (source start &optional (end (length source)))
385 (unless (and (typep start 'fixnum) (>= start 0))
386 (error "~S is not a non-negative fixnum." start))
387 (unless (and (typep end 'fixnum) (>= end start))
388 (error "END argument, ~S, is not a fixnum no less than START, ~S." end start))
389 (when (> start (length source))
390 (error "START argument, ~S, should be no greater than length of rod." start))
391 (when (> end (length source))
392 (error "END argument, ~S, should be no greater than length of rod." end))
393 (locally
394 (declare (type fixnum start end))
395 (let ((res (make-rod (- end start))))
396 (declare (type rod res))
397 (do ((i (- (- end start) 1) (the fixnum (- i 1))))
398 ((< i 0) res)
399 (declare (type fixnum i))
400 (setf (%rune res i) (aref source (the fixnum (+ i start))))))))
402 #+rune-is-character
403 (defun rod-subseq* (source start &optional (end (length source)))
404 (subseq source start end))
406 (deftype ufixnum () `(unsigned-byte ,(integer-length most-positive-fixnum)))
408 #-rune-is-character
409 (defun rod-subseq** (source start &optional (end (length source)))
410 (declare (type (simple-array rune (*)) source)
411 (type ufixnum start)
412 (type ufixnum end)
413 (optimize (speed 3) (safety 0)))
414 (let ((res (make-array (%- end start) :element-type 'rune)))
415 (declare (type (simple-array rune (*)) res))
416 (let ((i (%- end start)))
417 (declare (type ufixnum i))
418 (loop
419 (setf i (- i 1))
420 (when (= i 0)
421 (return))
422 (setf (%rune res i) (%rune source (the ufixnum (+ i start))))))
423 res))
425 #+rune-is-character
426 (defun rod-subseq** (source start &optional (end (length source)))
427 (subseq source start end))
429 (defun (setf rod-hash-get) (new-value hashtable rod &optional (start 0) (end (length rod)))
430 (rod-hash-set new-value hashtable rod start end))
432 (defun intern-name (rod &optional (start 0) (end (length rod)))
433 (multiple-value-bind (value successp key) (rod-hash-get (name-hashtable *ctx*) rod start end)
434 (declare (ignore value))
435 (if successp
437 (nth-value 1 (rod-hash-set t (name-hashtable *ctx*) rod start end)))))
439 ;;;; ---------------------------------------------------------------------------
440 ;;;;
441 ;;;; rod collector
442 ;;;;
444 (defvar *scratch-pad*)
445 (defvar *scratch-pad-2*)
446 (defvar *scratch-pad-3*)
447 (defvar *scratch-pad-4*)
449 (declaim (type (simple-array rune (*))
450 *scratch-pad* *scratch-pad-2* *scratch-pad-3* *scratch-pad-4*))
452 (defmacro with-scratch-pads ((&optional) &body body)
453 `(let ((*scratch-pad* (make-array 1024 :element-type 'rune))
454 (*scratch-pad-2* (make-array 1024 :element-type 'rune))
455 (*scratch-pad-3* (make-array 1024 :element-type 'rune))
456 (*scratch-pad-4* (make-array 1024 :element-type 'rune)))
457 ,@body))
459 (defmacro %put-unicode-char (code-var put)
460 `(progn
461 (cond #+rune-is-utf-16
462 ((%> ,code-var #xFFFF)
463 (,put (the rune (code-rune (%+ #xD7C0 (%ash ,code-var -10)))))
464 (,put (the rune (code-rune (%ior #xDC00 (%and ,code-var #x03FF))))))
466 (,put (code-rune ,code-var))))))
468 (defun adjust-array-by-copying (old-array new-size)
469 "Adjust an array by copying and thus ensures, that result is a SIMPLE-ARRAY."
470 (let ((res (make-array new-size :element-type (array-element-type old-array))))
471 (replace res old-array
472 :start1 0 :end1 (length old-array)
473 :start2 0 :end2 (length old-array))
474 res))
476 (defmacro with-rune-collector-aux (scratch collect body mode)
477 (let ((rod (gensym))
478 (n (gensym))
479 (i (gensym))
480 (b (gensym)))
481 `(let ((,n (length ,scratch))
482 (,i 0)
483 (,b ,scratch))
484 (declare (type fixnum ,n ,i))
485 (macrolet
486 ((,collect (x)
487 `((lambda (x)
488 (locally
489 (declare #.*fast*)
490 (when (%>= ,',i ,',n)
491 (setf ,',n (* 2 ,',n))
492 (setf ,',b
493 (setf ,',scratch
494 (adjust-array-by-copying ,',scratch ,',n))))
495 (setf (aref (the (simple-array rune (*)) ,',b) ,',i) x)
496 (incf ,',i)))
497 ,x)))
498 ,@body
499 ,(ecase mode
500 (:intern
501 `(intern-name ,b 0 ,i))
502 (:copy
503 `(let ((,rod (make-rod ,i)))
504 (while (not (%= ,i 0))
505 (setf ,i (%- ,i 1))
506 (setf (%rune ,rod ,i)
507 (aref (the (simple-array rune (*)) ,b) ,i)))
508 ,rod))
509 (:raw
510 `(values ,b 0 ,i))
511 )))))
513 '(defmacro with-rune-collector-aux (scratch collect body mode)
514 (let ((rod (gensym))
515 (n (gensym))
516 (i (gensym))
517 (b (gensym)))
518 `(let ((,n (length ,scratch))
519 (,i 0))
520 (declare (type fixnum ,n ,i))
521 (macrolet
522 ((,collect (x)
523 `((lambda (x)
524 (locally
525 (declare #.*fast*)
526 (when (%>= ,',i ,',n)
527 (setf ,',n (* 2 ,',n))
528 (setf ,',scratch
529 (setf ,',scratch
530 (adjust-array-by-copying ,',scratch ,',n))))
531 (setf (aref (the (simple-array rune (*)) ,',scratch) ,',i) x)
532 (incf ,',i)))
533 ,x)))
534 ,@body
535 ,(ecase mode
536 (:intern
537 `(intern-name ,scratch 0 ,i))
538 (:copy
539 `(let ((,rod (make-rod ,i)))
540 (while (%> ,i 0)
541 (setf ,i (%- ,i 1))
542 (setf (%rune ,rod ,i)
543 (aref (the (simple-array rune (*)) ,scratch) ,i)))
544 ,rod))
545 (:raw
546 `(values ,scratch 0 ,i))
547 )))))
549 (defmacro with-rune-collector ((collect) &body body)
550 `(with-rune-collector-aux *scratch-pad* ,collect ,body :copy))
552 (defmacro with-rune-collector-2 ((collect) &body body)
553 `(with-rune-collector-aux *scratch-pad-2* ,collect ,body :copy))
555 (defmacro with-rune-collector-3 ((collect) &body body)
556 `(with-rune-collector-aux *scratch-pad-3* ,collect ,body :copy))
558 (defmacro with-rune-collector-4 ((collect) &body body)
559 `(with-rune-collector-aux *scratch-pad-4* ,collect ,body :copy))
561 (defmacro with-rune-collector/intern ((collect) &body body)
562 `(with-rune-collector-aux *scratch-pad* ,collect ,body :intern))
564 (defmacro with-rune-collector/raw ((collect) &body body)
565 `(with-rune-collector-aux *scratch-pad* ,collect ,body :raw))
568 (defmacro while-reading-runes ((reader stream-in) &rest body)
569 ;; Thou shalt not leave body via a non local exit
570 (let ((stream (make-symbol "STREAM"))
571 (rptr (make-symbol "RPTR"))
572 (fptr (make-symbol "FPTR"))
573 (buf (make-symbol "BUF")) )
574 `(let* ((,stream ,stream-in)
575 (,rptr (xstream-read-ptr ,stream))
576 (,fptr (xstream-fill-ptr ,stream))
577 (,buf (xstream-buffer ,stream)))
578 (declare (type fixnum ,rptr ,fptr)
579 (type xstream ,stream))
580 (macrolet ((,reader (res-var)
581 `(cond ((%= ,',rptr ,',fptr)
582 (setf (xstream-read-ptr ,',stream) ,',rptr)
583 (setf ,res-var (xstream-underflow ,',stream))
584 (setf ,',rptr (xstream-read-ptr ,',stream))
585 (setf ,',fptr (xstream-fill-ptr ,',stream))
586 (setf ,',buf (xstream-buffer ,',stream)))
588 (setf ,res-var
589 (aref (the (simple-array read-element (*)) ,',buf)
590 (the fixnum ,',rptr)))
591 (setf ,',rptr (%+ ,',rptr 1))))))
592 (prog1
593 (let () .,body)
594 (setf (xstream-read-ptr ,stream) ,rptr) )))))
597 ;;;; ---------------------------------------------------------------------------
598 ;;;; DTD
599 ;;;;
601 (define-condition xml-parse-error (simple-error) ())
602 (define-condition well-formedness-violation (xml-parse-error) ())
603 (define-condition validity-error (xml-parse-error) ())
605 ;; We make some effort to signal end of file as a special condition, but we
606 ;; don't actually try very hard. Not sure whether we should. Right now I
607 ;; would prefer not to document this class.
608 (define-condition end-of-xstream (well-formedness-violation) ())
610 (defun describe-xstream (x s)
611 (format s " Line ~D, column ~D in ~A~%"
612 (xstream-line-number x)
613 (xstream-column-number x)
614 (let ((name (xstream-name x)))
615 (cond
616 ((null name)
617 "<anonymous stream>")
618 ((eq :main (stream-name-entity-kind name))
619 (stream-name-uri name))
621 name)))))
623 (defun %error (class stream message)
624 (let* ((zmain (if *ctx* (main-zstream *ctx*) nil))
625 (zstream (if (zstream-p stream) stream zmain))
626 (xstream (if (xstream-p stream) stream nil))
627 (s (make-string-output-stream)))
628 (write-line message s)
629 (when xstream
630 (write-line "Location:" s)
631 (describe-xstream xstream s))
632 (when zstream
633 (let ((stack
634 (remove xstream (remove :stop (zstream-input-stack zstream)))))
635 (when stack
636 (write-line "Context:" s)
637 (dolist (x stack)
638 (describe-xstream x s)))))
639 (when (and zmain (not (eq zstream zmain)))
640 (let ((stack
641 (remove xstream (remove :stop (zstream-input-stack zmain)))))
642 (when stack
643 (write-line "Context in main document:" s)
644 (dolist (x stack)
645 (describe-xstream x s)))))
646 (error class
647 :format-control "~A"
648 :format-arguments (list (get-output-stream-string s)))))
650 (defun validity-error (fmt &rest args)
651 (%error 'validity-error
653 (format nil "Document not valid: ~?" fmt args)))
655 (defun wf-error (stream fmt &rest args)
656 (%error 'well-formedness-violation
657 stream
658 (format nil "Document not well-formed: ~?" fmt args)))
660 (defun eox (stream &optional x &rest args)
661 (%error 'end-of-xstream
662 stream
663 (format nil "End of file~@[: ~?~]" x args)))
665 (defclass cxml-parser (sax:sax-parser) ((ctx :initarg :ctx)))
667 (defun parser-xstream (parser)
668 (car (zstream-input-stack (main-zstream (slot-value parser 'ctx)))))
670 (defun parser-stream-name (parser)
671 (let ((xstream (parser-xstream parser)))
672 (if xstream
673 (xstream-name xstream)
674 nil)))
676 (defmethod sax:line-number ((parser cxml-parser))
677 (let ((x (parser-xstream parser)))
678 (if x
679 (xstream-line-number x)
680 nil)))
682 (defmethod sax:column-number ((parser cxml-parser))
683 (let ((x (parser-xstream parser)))
684 (if x
685 (xstream-column-number x)
686 nil)))
688 (defmethod sax:system-id ((parser cxml-parser))
689 (let ((name (parser-stream-name parser)))
690 (if name
691 (stream-name-uri name)
692 nil)))
694 (defmethod sax:xml-base ((parser cxml-parser))
695 (let ((uri (car (base-stack (slot-value parser 'ctx)))))
696 (if (or (null uri) (stringp uri))
698 (puri:render-uri uri nil))))
700 (defvar *validate* t)
701 (defvar *external-subset-p* nil)
703 (defun validate-start-element (ctx name)
704 (when *validate*
705 (let* ((pair (car (model-stack ctx)))
706 (newval (funcall (car pair) name)))
707 (unless newval
708 (validity-error "(03) Element Valid: ~A" (rod-string name)))
709 (setf (car pair) newval)
710 (let ((e (find-element name (dtd ctx))))
711 (unless e
712 (validity-error "(03) Element Valid: no definition for ~A"
713 (rod-string name)))
714 (maybe-compile-cspec e)
715 (push (copy-cons (elmdef-compiled-cspec e)) (model-stack ctx))))))
717 (defun copy-cons (x)
718 (cons (car x) (cdr x)))
720 (defun validate-end-element (ctx name)
721 (when *validate*
722 (let ((pair (car (model-stack ctx))))
723 (unless (eq (funcall (car pair) nil) t)
724 (validity-error "(03) Element Valid: ~A" (rod-string name)))
725 (pop (model-stack ctx)))))
727 (defun validate-characters (ctx rod)
728 (when *validate*
729 (let ((pair (car (model-stack ctx))))
730 (unless (funcall (cdr pair) rod)
731 (validity-error "(03) Element Valid: unexpected PCDATA")))))
733 (defun standalone-check-necessary-p (def)
734 (and *validate*
735 (standalone-p *ctx*)
736 (etypecase def
737 (elmdef (elmdef-external-p def))
738 (attdef (attdef-external-p def)))))
740 ;; attribute validation, defaulting, and normalization -- except for for
741 ;; uniqueness checks, which are done after namespaces have been declared
742 (defun process-attributes (ctx name attlist)
743 (let ((e (find-element name (dtd ctx))))
744 (cond
746 (dolist (ad (elmdef-attributes e)) ;handle default values
747 (unless (get-attribute (attdef-name ad) attlist)
748 (case (attdef-default ad)
749 (:IMPLIED)
750 (:REQUIRED
751 (when *validate*
752 (validity-error "(18) Required Attribute: ~S not specified"
753 (rod-string (attdef-name ad)))))
755 (when (standalone-check-necessary-p ad)
756 (validity-error "(02) Standalone Document Declaration: missing attribute value"))
757 (push (sax:make-attribute :qname (attdef-name ad)
758 :value (cadr (attdef-default ad))
759 :specified-p nil)
760 attlist)))))
761 (dolist (a attlist) ;normalize non-CDATA values
762 (let* ((qname (sax:attribute-qname a))
763 (adef (find-attribute e qname)))
764 (when adef
765 (when (and *validate*
766 sax:*namespace-processing*
767 (eq (attdef-type adef) :ID)
768 (find #/: (sax:attribute-value a)))
769 (validity-error "colon in ID attribute"))
770 (unless (eq (attdef-type adef) :CDATA)
771 (let ((canon (canon-not-cdata-attval (sax:attribute-value a))))
772 (when (and (standalone-check-necessary-p adef)
773 (not (rod= (sax:attribute-value a) canon)))
774 (validity-error "(02) Standalone Document Declaration: attribute value not normalized"))
775 (setf (sax:attribute-value a) canon))))))
776 (when *validate* ;maybe validate attribute values
777 (dolist (a attlist)
778 (validate-attribute ctx e a))))
779 ((and *validate* attlist)
780 (validity-error "(04) Attribute Value Type: no definition for element ~A"
781 (rod-string name)))))
782 attlist)
784 (defun get-attribute (name attributes)
785 (member name attributes :key #'sax:attribute-qname :test #'rod=))
787 (defun validate-attribute (ctx e a)
788 (when (sax:attribute-specified-p a) ;defaults checked by DEFINE-ATTRIBUTE
789 (let* ((qname (sax:attribute-qname a))
790 (adef
791 (or (find-attribute e qname)
792 (validity-error "(04) Attribute Value Type: not declared: ~A"
793 (rod-string qname)))))
794 (validate-attribute* ctx adef (sax:attribute-value a)))))
796 (defun validate-attribute* (ctx adef value)
797 (let ((type (attdef-type adef))
798 (default (attdef-default adef)))
799 (when (and (listp default)
800 (eq (car default) :FIXED)
801 (not (rod= value (cadr default))))
802 (validity-error "(20) Fixed Attribute Default: expected ~S but got ~S"
803 (rod-string (cadr default))
804 (rod-string value)))
805 (ecase (if (listp type) (car type) type)
806 (:ID
807 (unless (valid-name-p value)
808 (validity-error "(08) ID: not a name: ~S" (rod-string value)))
809 (when (eq (gethash value (id-table ctx)) t)
810 (validity-error "(08) ID: ~S not unique" (rod-string value)))
811 (setf (gethash value (id-table ctx)) t))
812 (:IDREF
813 (validate-idref ctx value))
814 (:IDREFS
815 (let ((names (split-names value)))
816 (unless names
817 (validity-error "(11) IDREF: malformed names"))
818 (mapc (curry #'validate-idref ctx) names)))
819 (:NMTOKEN
820 (validate-nmtoken value))
821 (:NMTOKENS
822 (let ((tokens (split-names value)))
823 (unless tokens
824 (validity-error "(13) Name Token: malformed NMTOKENS"))
825 (mapc #'validate-nmtoken tokens)))
826 (:ENUMERATION
827 (unless (member value (cdr type) :test #'rod=)
828 (validity-error "(17) Enumeration: value not declared: ~S"
829 (rod-string value))))
830 (:NOTATION
831 (unless (member value (cdr type) :test #'rod=)
832 (validity-error "(14) Notation Attributes: ~S" (rod-string value))))
833 (:ENTITY
834 (validate-entity value))
835 (:ENTITIES
836 (let ((names (split-names value)))
837 (unless names
838 (validity-error "(13) Name Token: malformed NMTOKENS"))
839 (mapc #'validate-entity names)))
840 (:CDATA))))
842 (defun validate-idref (ctx value)
843 (unless (valid-name-p value)
844 (validity-error "(11) IDREF: not a name: ~S" (rod-string value)))
845 (unless (gethash value (id-table ctx))
846 (setf (gethash value (id-table ctx)) nil)))
848 (defun validate-nmtoken (value)
849 (unless (valid-nmtoken-p value)
850 (validity-error "(13) Name Token: not a NMTOKEN: ~S"
851 (rod-string value))))
853 (defstruct (entdef (:constructor)))
855 (defstruct (internal-entdef
856 (:include entdef)
857 (:constructor make-internal-entdef (value))
858 (:conc-name #:entdef-))
859 (value (error "missing argument") :type rod)
860 (expansion nil)
861 (external-subset-p *external-subset-p*))
863 (defstruct (external-entdef
864 (:include entdef)
865 (:constructor make-external-entdef (extid ndata))
866 (:conc-name #:entdef-))
867 (extid (error "missing argument") :type extid)
868 (ndata nil :type (or rod null)))
870 (defun validate-entity (value)
871 (unless (valid-name-p value)
872 (validity-error "(12) Entity Name: not a name: ~S" (rod-string value)))
873 (let ((def (let ((*validate*
874 ;; Similarly the entity refs are internal and
875 ;; don't need normalization ... the unparsed
876 ;; entities (and entities) aren't "references"
877 ;; -- sun/valid/sa03.xml
878 nil))
879 (get-entity-definition value :general (dtd *ctx*)))))
880 (unless (and (typep def 'external-entdef) (entdef-ndata def))
881 ;; unparsed entity
882 (validity-error "(12) Entity Name: ~S" (rod-string value)))))
884 (defun split-names (rod)
885 (flet ((whitespacep (x)
886 (or (rune= x #/U+0009)
887 (rune= x #/U+000A)
888 (rune= x #/U+000D)
889 (rune= x #/U+0020))))
890 (if (let ((n (length rod)))
891 (and (not (zerop n))
892 (or (whitespacep (rune rod 0))
893 (whitespacep (rune rod (1- n))))))
895 (split-sequence-if #'whitespacep rod :remove-empty-subseqs t))))
897 (defun zstream-base-sysid (zstream)
898 (let ((base-sysid
899 (dolist (k (zstream-input-stack zstream))
900 (let ((base-sysid (stream-name-uri (xstream-name k))))
901 (when base-sysid (return base-sysid))))))
902 base-sysid))
904 (defun absolute-uri (sysid source-stream)
905 (let ((base-sysid (zstream-base-sysid source-stream)))
906 ;; XXX is the IF correct?
907 (if base-sysid
908 (puri:merge-uris sysid base-sysid)
909 sysid)))
911 (defstruct (extid (:constructor make-extid (public system)))
912 (public nil :type (or rod null))
913 (system (error "missing argument") :type (or puri:uri null)))
915 (defun absolute-extid (source-stream extid)
916 (let ((sysid (extid-system extid))
917 (result (copy-extid extid)))
918 (setf (extid-system result) (absolute-uri sysid source-stream))
919 result))
921 (defun define-entity (source-stream name kind def)
922 (setf name (intern-name name))
923 (when (and sax:*namespace-processing* (find #/: name))
924 (wf-error source-stream "colon in entity name"))
925 (let ((table
926 (ecase kind
927 (:general (dtd-gentities (dtd *ctx*)))
928 (:parameter (dtd-pentities (dtd *ctx*))))))
929 (unless (gethash name table)
930 (when (and source-stream (handler *ctx*))
931 (report-entity (handler *ctx*) kind name def))
932 (when (typep def 'external-entdef)
933 (setf (entdef-extid def)
934 (absolute-extid source-stream (entdef-extid def))))
935 (setf (gethash name table)
936 (cons *external-subset-p* def)))))
938 (defun get-entity-definition (entity-name kind dtd)
939 (unless dtd
940 (wf-error nil "entity not defined: ~A" (rod-string entity-name)))
941 (destructuring-bind (extp &rest def)
942 (gethash entity-name
943 (ecase kind
944 (:general (dtd-gentities dtd))
945 (:parameter (dtd-pentities dtd)))
946 '(nil))
947 (when (and *validate* (standalone-p *ctx*) extp)
948 (validity-error "(02) Standalone Document Declaration: entity reference: ~S"
949 (rod-string entity-name)))
950 def))
952 (defun entity->xstream (zstream entity-name kind &optional internalp)
953 ;; `zstream' is for error messages
954 (let ((def (get-entity-definition entity-name kind (dtd *ctx*))))
955 (unless def
956 (wf-error zstream "Entity '~A' is not defined." (rod-string entity-name)))
957 (let (r)
958 (etypecase def
959 (internal-entdef
960 (when (and (standalone-p *ctx*)
961 (entdef-external-subset-p def))
962 (wf-error
963 zstream
964 "entity declared in external subset, but document is standalone"))
965 (setf r (make-rod-xstream (entdef-value def)))
966 (setf (xstream-name r)
967 (make-stream-name :entity-name entity-name
968 :entity-kind kind
969 :uri nil)))
970 (external-entdef
971 (when internalp
972 (wf-error zstream
973 "entity not internal: ~A" (rod-string entity-name)))
974 (when (entdef-ndata def)
975 (wf-error zstream
976 "reference to unparsed entity: ~A"
977 (rod-string entity-name)))
978 (setf r (xstream-open-extid (extid-using-catalog (entdef-extid def))))
979 (setf (stream-name-entity-name (xstream-name r)) entity-name
980 (stream-name-entity-kind (xstream-name r)) kind)))
981 r)))
983 (defun checked-get-entdef (name type)
984 (let ((def (get-entity-definition name type (dtd *ctx*))))
985 (unless def
986 (wf-error nil "Entity '~A' is not defined." (rod-string name)))
987 def))
989 (defun xstream-open-extid* (entity-resolver pubid sysid)
990 (let* ((stream
991 (or (funcall (or entity-resolver (constantly nil)) pubid sysid)
992 (open (uri-to-pathname sysid)
993 :element-type '(unsigned-byte 8)
994 :direction :input))))
995 (make-xstream stream
996 :name (make-stream-name :uri sysid)
997 :initial-speed 1)))
999 (defun xstream-open-extid (extid)
1000 (xstream-open-extid* (entity-resolver *ctx*)
1001 (extid-public extid)
1002 (extid-system extid)))
1004 (defun call-with-entity-expansion-as-stream (zstream cont name kind internalp)
1005 ;; `zstream' is for error messages
1006 (let ((in (entity->xstream zstream name kind internalp)))
1007 (push (stream-name-uri (xstream-name in)) (base-stack *ctx*))
1008 (unwind-protect
1009 (funcall cont in)
1010 (pop (base-stack *ctx*))
1011 (close-xstream in))))
1013 (defun ensure-dtd ()
1014 (unless (dtd *ctx*)
1015 (setf (dtd *ctx*) (make-dtd))
1016 (define-default-entities)))
1018 (defun define-default-entities ()
1019 (define-entity nil #"lt" :general (make-internal-entdef #"&#60;"))
1020 (define-entity nil #"gt" :general (make-internal-entdef #">"))
1021 (define-entity nil #"amp" :general (make-internal-entdef #"&#38;"))
1022 (define-entity nil #"apos" :general (make-internal-entdef #"'"))
1023 (define-entity nil #"quot" :general (make-internal-entdef #"\"")))
1025 (defstruct attdef
1026 ;; an attribute definition
1027 element ;name of element this attribute belongs to
1028 name ;name of attribute
1029 type ;type of attribute; either one of :CDATA, :ID, :IDREF, :IDREFS,
1030 ; :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS, or
1031 ; (:NOTATION <name>*)
1032 ; (:ENUMERATION <name>*)
1033 default ;default value of attribute:
1034 ; :REQUIRED, :IMPLIED, (:FIXED content) or (:DEFAULT content)
1035 (external-p *external-subset-p*)
1038 (defstruct elmdef
1039 ;; an element definition
1040 name ;name of the element
1041 content ;content model [*]
1042 attributes ;list of defined attributes
1043 compiled-cspec ;cons of validation function for contentspec
1044 (external-p *external-subset-p*)
1047 ;; [*] in XML it is possible to define attributes before the element
1048 ;; itself is defined and since we hang attribute definitions into the
1049 ;; relevant element definitions, the `content' slot indicates whether an
1050 ;; element was actually defined. It is NIL until set to a content model
1051 ;; when the element type declaration is processed.
1053 (defun %make-rod-hash-table ()
1054 ;; XXX with portable hash tables, this is the only way to case-sensitively
1055 ;; use rods. However, EQUALP often has horrible performance! Most Lisps
1056 ;; provide extensions for user-defined equality, we should use them! There
1057 ;; is also a home-made hash table for rods defined below, written by
1058 ;; Gilbert (I think). We could also use that one, but I would prefer the
1059 ;; first method, even if it's unportable.
1060 (make-hash-table :test
1061 #+rune-is-character 'equal
1062 #-rune-is-character 'equalp))
1064 (defstruct dtd
1065 (elements (%make-rod-hash-table)) ;elmdefs
1066 (gentities (%make-rod-hash-table)) ;general entities
1067 (pentities (%make-rod-hash-table)) ;parameter entities
1068 (notations (%make-rod-hash-table)))
1070 (defun make-dtd-cache ()
1071 (puri:make-uri-space))
1073 (defvar *cache-all-dtds* nil)
1074 (defvar *dtd-cache* (make-dtd-cache))
1076 (defun remdtd (uri dtd-cache)
1077 (setf uri (puri:intern-uri uri dtd-cache))
1078 (prog1
1079 (and (getf (puri:uri-plist uri) 'dtd) t)
1080 (puri:unintern-uri uri dtd-cache)))
1082 (defun clear-dtd-cache (dtd-cache)
1083 (puri:unintern-uri t dtd-cache))
1085 (defun getdtd (uri dtd-cache)
1086 (getf (puri:uri-plist (puri:intern-uri uri dtd-cache)) 'dtd))
1088 (defun (setf getdtd) (newval uri dtd-cache)
1089 (setf (getf (puri:uri-plist (puri:intern-uri uri dtd-cache)) 'dtd) newval)
1090 newval)
1093 ;;;;
1095 (defun find-element (name dtd)
1096 (gethash name (dtd-elements dtd)))
1098 (defun define-element (dtd element-name &optional content-model)
1099 (let ((e (find-element element-name dtd)))
1100 (cond
1101 ((null e)
1102 (prog1
1103 (setf (gethash element-name (dtd-elements dtd))
1104 (make-elmdef :name element-name :content content-model))
1105 (when content-model
1106 (sax:element-declaration (handler *ctx*) element-name content-model))))
1107 ((null content-model)
1110 (when *validate*
1111 (when (elmdef-content e)
1112 (validity-error "(05) Unique Element Type Declaration"))
1113 (when (eq content-model :EMPTY)
1114 (dolist (ad (elmdef-attributes e))
1115 (let ((type (attdef-type ad)))
1116 (when (and (listp type) (eq (car type) :NOTATION))
1117 (validity-error "(16) No Notation on Empty Element: ~S"
1118 (rod-string element-name)))))))
1119 (sax:element-declaration (handler *ctx*) element-name content-model)
1120 (setf (elmdef-content e) content-model)
1121 (setf (elmdef-external-p e) *external-subset-p*)
1122 e))))
1124 (defvar *redefinition-warning* nil)
1126 (defun define-attribute (dtd element name type default)
1127 (let ((adef (make-attdef :element element
1128 :name name
1129 :type type
1130 :default default))
1131 (e (or (find-element element dtd)
1132 (define-element dtd element))))
1133 (when (and *validate* (listp default))
1134 (unless (eq (attdef-type adef) :CDATA)
1135 (setf (second default) (canon-not-cdata-attval (second default))))
1136 (validate-attribute* *ctx* adef (second default)))
1137 (cond ((find-attribute e name)
1138 (when *redefinition-warning*
1139 (warn "Attribute \"~A\" of \"~A\" not redefined."
1140 (rod-string name)
1141 (rod-string element))))
1143 (when *validate*
1144 (when (eq type :ID)
1145 (when (find :ID (elmdef-attributes e) :key #'attdef-type)
1146 (validity-error "(09) One ID per Element Type: element ~A"
1147 (rod-string element)))
1148 (unless (member default '(:REQUIRED :IMPLIED))
1149 (validity-error "(10) ID Attribute Default: ~A"
1150 (rod-string element))))
1151 (flet ((notationp (type)
1152 (and (listp type) (eq (car type) :NOTATION))))
1153 (when (notationp type)
1154 (when (find-if #'notationp (elmdef-attributes e)
1155 :key #'attdef-type)
1156 (validity-error "(15) One Notation Per Element Type: ~S"
1157 (rod-string element)))
1158 (when (eq (elmdef-content e) :EMPTY)
1159 (validity-error "(16) No Notation on Empty Element: ~S"
1160 (rod-string element))))))
1161 (sax:attribute-declaration (handler *ctx*) element name type default)
1162 (push adef (elmdef-attributes e))))))
1164 (defun find-attribute (elmdef name)
1165 (find name (elmdef-attributes elmdef) :key #'attdef-name :test #'rod=))
1167 (defun define-notation (dtd name id)
1168 (let ((ns (dtd-notations dtd)))
1169 (when (gethash name ns)
1170 (validity-error "(24) Unique Notation Name: ~S" (rod-string name)))
1171 (setf (gethash name ns) id)))
1173 (defun find-notation (name dtd)
1174 (gethash name (dtd-notations dtd)))
1176 ;;;; ---------------------------------------------------------------------------
1177 ;;;; z streams and lexer
1178 ;;;;
1180 (defstruct zstream
1181 token-category
1182 token-semantic
1183 input-stack)
1185 (defun call-with-zstream (fn zstream)
1186 (unwind-protect
1187 (funcall fn zstream)
1188 (dolist (input (zstream-input-stack zstream))
1189 (cond #-x&y-streams-are-stream
1190 ((xstream-p input)
1191 (close-xstream input))
1192 #+x&y-streams-are-stream
1193 ((streamp input)
1194 (close input))))))
1196 (defmacro with-zstream ((zstream &rest args) &body body)
1197 `(call-with-zstream (lambda (,zstream) ,@body)
1198 (make-zstream ,@args)))
1200 (defun read-token (input)
1201 (cond ((zstream-token-category input)
1202 (multiple-value-prog1
1203 (values (zstream-token-category input)
1204 (zstream-token-semantic input))
1205 (setf (zstream-token-category input) nil
1206 (zstream-token-semantic input) nil)))
1208 (read-token-2 input))))
1210 (defun peek-token (input)
1211 (cond ((zstream-token-category input)
1212 (values
1213 (zstream-token-category input)
1214 (zstream-token-semantic input)))
1216 (multiple-value-bind (c s) (read-token input)
1217 (setf (zstream-token-category input) c
1218 (zstream-token-semantic input) s))
1219 (values (zstream-token-category input)
1220 (zstream-token-semantic input)))))
1222 (defun read-token-2 (input)
1223 (cond ((null (zstream-input-stack input))
1224 (values :eof nil))
1226 (let ((c (peek-rune (car (zstream-input-stack input)))))
1227 (cond ((eq c :eof)
1228 (cond ((eq (cadr (zstream-input-stack input)) :stop)
1229 (values :eof nil))
1231 (close-xstream (pop (zstream-input-stack input)))
1232 (if (null (zstream-input-stack input))
1233 (values :eof nil)
1234 (values :S nil) ;fake #x20 after PE expansion
1235 ))))
1237 (read-token-3 input)))))))
1239 (defvar *data-behaviour*
1240 ) ;either :DTD or :DOC
1242 (defun read-token-3 (zinput)
1243 (let ((input (car (zstream-input-stack zinput))))
1244 ;; PI Comment
1245 (let ((c (read-rune input)))
1246 (cond
1247 ;; first the common tokens
1248 ((rune= #/< c)
1249 (read-token-after-|<| zinput input))
1250 ;; now dispatch
1252 (ecase *data-behaviour*
1253 (:DTD
1254 (cond ((rune= #/\[ c) :\[)
1255 ((rune= #/\] c) :\])
1256 ((rune= #/\( c) :\()
1257 ((rune= #/\) c) :\))
1258 ((rune= #/\| c) :\|)
1259 ((rune= #/\> c) :\>)
1260 ((rune= #/\" c) :\")
1261 ((rune= #/\' c) :\')
1262 ((rune= #/\, c) :\,)
1263 ((rune= #/\? c) :\?)
1264 ((rune= #/\* c) :\*)
1265 ((rune= #/\+ c) :\+)
1266 ((name-rune-p c)
1267 (unread-rune c input)
1268 (values :nmtoken (read-name-token input)))
1269 ((rune= #/# c)
1270 (let ((q (read-name-token input)))
1271 (cond ((rod= q '#.(string-rod "REQUIRED")) :|#REQUIRED|)
1272 ((rod= q '#.(string-rod "IMPLIED")) :|#IMPLIED|)
1273 ((rod= q '#.(string-rod "FIXED")) :|#FIXED|)
1274 ((rod= q '#.(string-rod "PCDATA")) :|#PCDATA|)
1276 (wf-error zinput "Unknown token: ~S." q)))))
1277 ((or (rune= c #/U+0020)
1278 (rune= c #/U+0009)
1279 (rune= c #/U+000D)
1280 (rune= c #/U+000A))
1281 (values :S nil))
1282 ((rune= #/% c)
1283 (cond ((name-start-rune-p (peek-rune input))
1284 ;; an entity reference
1285 (read-pe-reference zinput))
1287 (values :%))))
1289 (wf-error zinput "Unexpected character ~S." c))))
1290 (:DOC
1291 (cond
1292 ((rune= c #/&)
1293 (multiple-value-bind (kind data) (read-entity-like input)
1294 (cond ((eq kind :ENTITY-REFERENCE)
1295 (values :ENTITY-REF data))
1296 ((eq kind :CHARACTER-REFERENCE)
1297 (values :CDATA
1298 (with-rune-collector (collect)
1299 (%put-unicode-char data collect)))))))
1301 (unread-rune c input)
1302 (values :CDATA (read-cdata input)))))))))))
1304 (definline check-rune (input actual expected)
1305 (unless (eql actual expected)
1306 (wf-error input "expected #/~A but found #/~A"
1307 (rune-char expected)
1308 (rune-char actual))))
1310 (defun read-pe-reference (zinput)
1311 (let* ((input (car (zstream-input-stack zinput)))
1312 (nam (read-name-token input)))
1313 (check-rune input #/\; (read-rune input))
1314 (cond (*expand-pe-p*
1315 ;; no external entities here!
1316 (let ((i2 (entity->xstream zinput nam :parameter)))
1317 (zstream-push i2 zinput))
1318 (values :S nil) ;space before inserted PE expansion.
1321 (values :PE-REFERENCE nam)) )))
1323 (defun read-token-after-|<| (zinput input)
1324 (let ((d (read-rune input)))
1325 (cond ((eq d :eof)
1326 (eox input "EOF after '<'"))
1327 ((rune= #/! d)
1328 (read-token-after-|<!| input))
1329 ((rune= #/? d)
1330 (multiple-value-bind (target content) (read-pi input)
1331 (cond ((rod= target '#.(string-rod "xml"))
1332 (values :xml-decl (cons target content)))
1333 ((rod-equal target '#.(string-rod "XML"))
1334 (wf-error zinput
1335 "You lost -- no XML processing instructions."))
1336 ((and sax:*namespace-processing* (position #/: target))
1337 (wf-error zinput
1338 "Processing instruction target ~S is not a ~
1339 valid NcName."
1340 (mu target)))
1342 (values :PI (cons target content))))))
1343 ((eq *data-behaviour* :DTD)
1344 (unread-rune d input)
1345 (unless (or (rune= #// d) (name-start-rune-p d))
1346 (wf-error zinput "Expected '!' or '?' after '<' in DTD."))
1347 (values :seen-< nil))
1348 ((rune= #// d)
1349 (let ((c (peek-rune input)))
1350 (cond ((name-start-rune-p c)
1351 (read-tag-2 zinput input :etag))
1353 (wf-error zinput
1354 "Expecting name start rune after \"</\".")))))
1355 ((name-start-rune-p d)
1356 (unread-rune d input)
1357 (read-tag-2 zinput input :stag))
1359 (wf-error zinput "Expected '!' or '?' after '<' in DTD.")))))
1361 (defun read-token-after-|<!| (input)
1362 (let ((d (read-rune input)))
1363 (cond ((eq d :eof)
1364 (eox input "EOF after \"<!\"."))
1365 ((name-start-rune-p d)
1366 (unread-rune d input)
1367 (let ((name (read-name-token input)))
1368 (cond ((rod= name '#.(string-rod "ELEMENT")) :|<!ELEMENT|)
1369 ((rod= name '#.(string-rod "ENTITY")) :|<!ENTITY|)
1370 ((rod= name '#.(string-rod "ATTLIST")) :|<!ATTLIST|)
1371 ((rod= name '#.(string-rod "NOTATION")) :|<!NOTATION|)
1372 ((rod= name '#.(string-rod "DOCTYPE")) :|<!DOCTYPE|)
1374 (wf-error input"`<!~A' unknown." (rod-string name))))))
1375 ((rune= #/\[ d)
1376 (values :|<![| nil))
1377 ((rune= #/- d)
1378 (setf d (read-rune input))
1379 (cond ((rune= #/- d)
1380 (values
1381 :COMMENT
1382 (read-comment-content input)))
1384 (wf-error input"Bad character ~S after \"<!-\"" d))))
1386 (wf-error input "Bad character ~S after \"<!\"" d)))))
1388 (definline read-S? (input)
1389 (while (member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D)
1390 :test #'eql)
1391 (consume-rune input)))
1393 (defun read-attribute-list (zinput input imagine-space-p)
1394 (cond ((or imagine-space-p
1395 (let ((c (peek-rune input)))
1396 (and (not (eq c :eof))
1397 (space-rune-p c))))
1398 (read-S? input)
1399 (cond ((eq (peek-rune input) :eof)
1400 nil)
1401 ((name-start-rune-p (peek-rune input))
1402 (cons (read-attribute zinput input)
1403 (read-attribute-list zinput input nil)))
1405 nil)))
1407 nil)))
1409 (defun read-entity-like (input)
1410 "Read an entity reference off the xstream `input'. Returns two values:
1411 either :ENTITY-REFERENCE <interned-rod> in case of a named entity
1412 or :CHARACTER-REFERENCE <integer> in case of character references.
1413 The initial #\\& is considered to be consumed already."
1414 (let ((c (peek-rune input)))
1415 (cond ((eq c :eof)
1416 (eox input "EOF after '&'"))
1417 ((rune= c #/#)
1418 (values :CHARACTER-REFERENCE (read-character-reference input)))
1420 (unless (name-start-rune-p (peek-rune input))
1421 (wf-error input "Expecting name after &."))
1422 (let ((name (read-name-token input)))
1423 (setf c (read-rune input))
1424 (unless (rune= c #/\;)
1425 (wf-error input "Expected \";\"."))
1426 (values :ENTITY-REFERENCE name))))))
1428 (defun read-tag-2 (zinput input kind)
1429 (let ((name (read-name-token input))
1430 (atts nil))
1431 (setf atts (read-attribute-list zinput input nil))
1433 ;; check for double attributes
1434 (do ((q atts (cdr q)))
1435 ((null q))
1436 (cond ((find (caar q) (cdr q) :key #'car)
1437 (wf-error zinput "Attribute ~S has two definitions in element ~S."
1438 (rod-string (caar q))
1439 (rod-string name)))))
1441 (cond ((eq (peek-rune input) #/>)
1442 (consume-rune input)
1443 (values kind (cons name atts)))
1444 ((eq (peek-rune input) #//)
1445 (consume-rune input)
1446 (check-rune input #/> (read-rune input))
1447 (values :ztag (cons name atts)))
1449 (wf-error zinput "syntax error in read-tag-2.")) )))
1451 (defun read-attribute (zinput input)
1452 (unless (name-start-rune-p (peek-rune input))
1453 (wf-error zinput "Expected name."))
1454 ;; arg thanks to the post mortem nature of name space declarations,
1455 ;; we could only process the attribute values post mortem.
1456 (let ((name (read-name-token input)))
1457 (while (let ((c (peek-rune input)))
1458 (and (not (eq c :eof))
1459 (or (rune= c #/U+0020)
1460 (rune= c #/U+0009)
1461 (rune= c #/U+000A)
1462 (rune= c #/U+000D))))
1463 (consume-rune input))
1464 (unless (eq (read-rune input) #/=)
1465 (wf-error zinput "Expected \"=\"."))
1466 (while (let ((c (peek-rune input)))
1467 (and (not (eq c :eof))
1468 (or (rune= c #/U+0020)
1469 (rune= c #/U+0009)
1470 (rune= c #/U+000A)
1471 (rune= c #/U+000D))))
1472 (consume-rune input))
1473 (cons name (read-att-value-2 input))))
1475 (defun canon-not-cdata-attval (value)
1476 ;; | If the declared value is not CDATA, then the XML processor must
1477 ;; | further process the normalized attribute value by discarding any
1478 ;; | leading and trailing space (#x20) characters, and by replacing
1479 ;; | sequences of space (#x20) characters by a single space (#x20)
1480 ;; | character.
1481 (with-rune-collector (collect)
1482 (let ((gimme-20 nil)
1483 (anything-seen-p nil))
1484 (map nil (lambda (c)
1485 (cond ((rune= c #/u+0020)
1486 (setf gimme-20 t))
1488 (when (and anything-seen-p gimme-20)
1489 (collect #/u+0020))
1490 (setf gimme-20 nil)
1491 (setf anything-seen-p t)
1492 (collect c))))
1493 value))))
1495 (definline data-rune-p (rune)
1496 ;; Any Unicode character, excluding FFFE, and FFFF.
1497 ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
1498 (let ((c (rune-code rune)))
1499 (or (= c #x9) (= c #xA) (= c #xD)
1500 (<= #x20 c #xD7FF)
1501 #+rune-is-utf-16 (<= #xD800 c #xDFFF)
1502 (<= #xE000 c #xFFFD)
1503 #-rune-is-utf-16 (<= #x10000 c #x10FFFF))))
1505 (defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
1506 (with-rune-collector-2 (collect)
1507 (labels ((muffle (input delim)
1508 (let (c)
1509 (loop
1510 (setf c (read-rune input))
1511 (cond ((eql delim c)
1512 (return))
1513 ((eq c :eof)
1514 (eox input "EOF"))
1515 ((rune= c #/&)
1516 (setf c (peek-rune input))
1517 (cond ((eql c :eof)
1518 (eox input))
1519 ((rune= c #/#)
1520 (let ((c (read-character-reference input)))
1521 (%put-unicode-char c collect)))
1523 (unless (name-start-rune-p (peek-rune input))
1524 (wf-error zinput "Expecting name after &."))
1525 (let ((name (read-name-token input)))
1526 (setf c (read-rune input))
1527 (check-rune input c #/\;)
1528 (ecase mode
1529 (:ATT
1530 (recurse-on-entity
1531 zinput name :general
1532 (lambda (zinput)
1533 (muffle (car (zstream-input-stack zinput))
1534 :eof))
1536 (:ENT
1537 ;; bypass, but never the less we
1538 ;; need to check for legal
1539 ;; syntax.
1540 ;; Must it be defined?
1541 ;; allerdings: unparsed sind verboten
1542 (collect #/&)
1543 (map nil (lambda (x) (collect x)) name)
1544 (collect #/\; )))))))
1545 ((and (eq mode :ENT) (rune= c #/%))
1546 (let ((d (peek-rune input)))
1547 (when (eq d :eof)
1548 (eox input))
1549 (unless (name-start-rune-p d)
1550 (wf-error zinput "Expecting name after %.")))
1551 (let ((name (read-name-token input)))
1552 (setf c (read-rune input))
1553 (check-rune input c #/\;)
1554 (cond (*expand-pe-p*
1555 (recurse-on-entity
1556 zinput name :parameter
1557 (lambda (zinput)
1558 (muffle (car (zstream-input-stack zinput))
1559 :eof))))
1561 (wf-error zinput "No PE here.")))))
1562 ((and (eq mode :ATT) (rune= c #/<))
1563 (wf-error zinput "unexpected #\/<"))
1564 ((and canon-space-p (space-rune-p c))
1565 (collect #/space))
1566 ((not (data-rune-p c))
1567 (wf-error zinput "illegal char: ~S." c))
1569 (collect c)))))))
1570 (declare (dynamic-extent #'muffle))
1571 (muffle input (or delim
1572 (let ((delim (read-rune input)))
1573 (unless (member delim '(#/\" #/\') :test #'eql)
1574 (wf-error zinput "invalid attribute delimiter"))
1575 delim))))))
1577 (defun read-character-reference (input)
1578 ;; The #/& is already read
1579 (let ((res
1580 (let ((c (read-rune input)))
1581 (check-rune input c #/#)
1582 (setq c (read-rune input))
1583 (cond ((eql c :eof)
1584 (eox input))
1585 ((eql c #/x)
1586 ;; hexadecimal
1587 (setq c (read-rune input))
1588 (when (eql c :eof)
1589 (eox input))
1590 (unless (digit-rune-p c 16)
1591 (wf-error input "garbage in character reference"))
1592 (prog1
1593 (parse-integer
1594 (with-output-to-string (sink)
1595 (write-char (rune-char c) sink)
1596 (while (progn
1597 (setq c (read-rune input))
1598 (when (eql c :eof)
1599 (eox input))
1600 (digit-rune-p c 16))
1601 (write-char (rune-char c) sink)))
1602 :radix 16)
1603 (check-rune input c #/\;)))
1604 ((rune<= #/0 c #/9)
1605 ;; decimal
1606 (prog1
1607 (parse-integer
1608 (with-output-to-string (sink)
1609 (write-char (rune-char c) sink)
1610 (while (progn
1611 (setq c (read-rune input))
1612 (when (eql c :eof)
1613 (eox input))
1614 (rune<= #/0 c #/9))
1615 (write-char (rune-char c) sink)))
1616 :radix 10)
1617 (check-rune input c #/\;)))
1619 (wf-error input "Bad char in numeric character entity."))))))
1620 (unless (code-data-char-p res)
1621 (wf-error
1622 input
1623 "expansion of numeric character reference (#x~X) is no data char."
1624 res))
1625 res))
1627 (defun read-pi (input)
1628 ;; "<?" is already read
1629 (let (name)
1630 (let ((c (peek-rune input)))
1631 (unless (name-start-rune-p c)
1632 (wf-error input "Expecting name after '<?'"))
1633 (setf name (read-name-token input)))
1634 (cond
1635 ((member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D)
1636 :test #'eql)
1637 (values name (read-pi-content input)))
1639 (unless (and (eql (read-rune input) #/?)
1640 (eql (read-rune input) #/>))
1641 (wf-error input "malformed processing instruction"))
1642 (values name "")))))
1644 (defun read-pi-content (input)
1645 (read-S? input)
1646 (let (d)
1647 (with-rune-collector (collect)
1648 (block nil
1649 (tagbody
1650 state-1
1651 (setf d (read-rune input))
1652 (when (eq d :eof)
1653 (eox input))
1654 (unless (data-rune-p d)
1655 (wf-error input "Illegal char: ~S." d))
1656 (when (rune= d #/?) (go state-2))
1657 (collect d)
1658 (go state-1)
1659 state-2 ;; #/? seen
1660 (setf d (read-rune input))
1661 (when (eq d :eof)
1662 (eox input))
1663 (unless (data-rune-p d)
1664 (wf-error input "Illegal char: ~S." d))
1665 (when (rune= d #/>) (return))
1666 (when (rune= d #/?)
1667 (collect #/?)
1668 (go state-2))
1669 (collect #/?)
1670 (collect d)
1671 (go state-1))))))
1673 (defun read-comment-content (input &aux d)
1674 (with-rune-collector (collect)
1675 (block nil
1676 (tagbody
1677 state-1
1678 (setf d (read-rune input))
1679 (when (eq d :eof)
1680 (eox input))
1681 (unless (data-rune-p d)
1682 (wf-error input "Illegal char: ~S." d))
1683 (when (rune= d #/-) (go state-2))
1684 (collect d)
1685 (go state-1)
1686 state-2 ;; #/- seen
1687 (setf d (read-rune input))
1688 (when (eq d :eof)
1689 (eox input))
1690 (unless (data-rune-p d)
1691 (wf-error input "Illegal char: ~S." d))
1692 (when (rune= d #/-) (go state-3))
1693 (collect #/-)
1694 (collect d)
1695 (go state-1)
1696 state-3 ;; #/- #/- seen
1697 (setf d (read-rune input))
1698 (when (eq d :eof)
1699 (eox input))
1700 (unless (data-rune-p d)
1701 (wf-error input "Illegal char: ~S." d))
1702 (when (rune= d #/>) (return))
1703 (wf-error input "'--' not allowed in a comment")
1704 (when (rune= d #/-)
1705 (collect #/-)
1706 (go state-3))
1707 (collect #/-)
1708 (collect #/-)
1709 (collect d)
1710 (go state-1)))))
1712 (defun read-cdata-sect (input &aux d)
1713 ;; <![CDATA[ is already read
1714 ;; read anything up to ]]>
1715 (with-rune-collector (collect)
1716 (block nil
1717 (tagbody
1718 state-1
1719 (setf d (read-rune input))
1720 (when (eq d :eof)
1721 (eox input))
1722 (unless (data-rune-p d)
1723 (wf-error input "Illegal char: ~S." d))
1724 (when (rune= d #/\]) (go state-2))
1725 (collect d)
1726 (go state-1)
1727 state-2 ;; #/] seen
1728 (setf d (read-rune input))
1729 (when (eq d :eof)
1730 (eox input))
1731 (unless (data-rune-p d)
1732 (wf-error input "Illegal char: ~S." d))
1733 (when (rune= d #/\]) (go state-3))
1734 (collect #/\])
1735 (collect d)
1736 (go state-1)
1737 state-3 ;; #/\] #/\] seen
1738 (setf d (read-rune input))
1739 (when (eq d :eof)
1740 (eox input))
1741 (unless (data-rune-p d)
1742 (wf-error input "Illegal char: ~S." d))
1743 (when (rune= d #/>)
1744 (return))
1745 (when (rune= d #/\])
1746 (collect #/\])
1747 (go state-3))
1748 (collect #/\])
1749 (collect #/\])
1750 (collect d)
1751 (go state-1)))))
1753 ;; some character categories
1755 (defun space-rune-p (rune)
1756 (declare (type rune rune))
1757 (or (rune= rune #/U+0020)
1758 (rune= rune #/U+0009)
1759 (rune= rune #/U+000A)
1760 (rune= rune #/U+000D)))
1762 (defun code-data-char-p (c)
1763 ;; Any Unicode character, excluding FFFE, and FFFF.
1764 ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
1765 (or (= c #x9) (= c #xA) (= c #xD)
1766 (<= #x20 c #xD7FF)
1767 #+rune-is-utf-16 (<= #xD800 c #xDFFF)
1768 (<= #xE000 c #xFFFD)
1769 #-rune-is-utf-16 (<= #x10000 c #x10FFFF)))
1771 (defun pubid-char-p (c)
1772 (or (rune= c #/u+0020) (rune= c #/u+000D) (rune= c #/u+000A)
1773 (rune<= #/a c #/z)
1774 (rune<= #/A c #/Z)
1775 (rune<= #/0 c #/9)
1776 (member c '(#/- #/' #/\( #/\) #/+ #/, #/. #//
1777 #/: #/= #/? #/\; #/! #/* #/#
1778 #/@ #/$ #/_ #/%))))
1781 (defun expect (input category)
1782 (multiple-value-bind (cat sem) (read-token input)
1783 (unless (eq cat category)
1784 (wf-error input "Expected ~S saw ~S [~S]" category cat sem))
1785 (values cat sem)))
1787 (defun consume-token (input)
1788 (read-token input))
1790 ;;;; ---------------------------------------------------------------------------
1791 ;;;; Parser
1792 ;;;;
1794 (defun p/S (input)
1795 ;; S ::= (#x20 | #x9 | #xD | #xA)+
1796 (expect input :S)
1797 (while (eq (peek-token input) :S)
1798 (consume-token input)))
1800 (defun p/S? (input)
1801 ;; S ::= (#x20 | #x9 | #xD | #xA)+
1802 (while (eq (peek-token input) :S)
1803 (consume-token input)))
1805 (defun p/nmtoken (input)
1806 (nth-value 1 (expect input :nmtoken)))
1808 (defun p/name (input)
1809 (let ((result (p/nmtoken input)))
1810 (unless (name-start-rune-p (elt result 0))
1811 (wf-error input "Expected name."))
1812 result))
1814 (defun p/attlist-decl (input)
1815 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name (S AttDef)* S? '>'
1816 (let (elm-name)
1817 (expect input :|<!ATTLIST|)
1818 (p/S input)
1819 (setf elm-name (p/nmtoken input))
1820 (loop
1821 (let ((tok (read-token input)))
1822 (case tok
1824 (p/S? input)
1825 (cond ((eq (peek-token input) :>)
1826 (consume-token input)
1827 (return))
1829 (multiple-value-bind (name type default) (p/attdef input)
1830 (define-attribute (dtd *ctx*) elm-name name type default)) )))
1832 (return))
1833 (otherwise
1834 (wf-error input
1835 "Expected either another AttDef or end of \"<!ATTLIST\". -- saw ~S."
1836 tok)))))))
1838 (defun p/attdef (input)
1839 ;; [53] AttDef ::= Name S AttType S DefaultDecl
1840 (let (name type default)
1841 (setf name (p/nmtoken input))
1842 (p/S input)
1843 (setf type (p/att-type input))
1844 (p/S input)
1845 (setf default (p/default-decl input))
1846 (values name type default)))
1848 (defun p/list (input item-parser delimiter)
1849 ;; Parse something like S? <item> (S? <delimiter> <item>)* S?
1851 (declare (type function item-parser))
1852 (let (res)
1853 (p/S? input)
1854 (setf res (list (funcall item-parser input)))
1855 (loop
1856 (p/S? input)
1857 (cond ((eq (peek-token input) delimiter)
1858 (consume-token input)
1859 (p/S? input)
1860 (push (funcall item-parser input) res))
1862 (return))))
1863 (p/S? input)
1864 (reverse res)))
1866 (defun p/att-type (input)
1867 ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType
1868 ;; [55] StringType ::= 'CDATA'
1869 ;; [56] TokenizedType ::= 'ID' /*VC: ID */
1870 ;; /*VC: One ID per Element Type */
1871 ;; /*VC: ID Attribute Default */
1872 ;; | 'IDREF' /*VC: IDREF */
1873 ;; | 'IDREFS' /*VC: IDREF */
1874 ;; | 'ENTITY' /*VC: Entity Name */
1875 ;; | 'ENTITIES' /*VC: Entity Name */
1876 ;; | 'NMTOKEN' /*VC: Name Token */
1877 ;; | 'NMTOKENS' /*VC: Name Token */
1878 ;; [57] EnumeratedType ::= NotationType | Enumeration
1879 ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
1880 ;; /* VC: Notation Attributes */
1881 ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */
1882 (multiple-value-bind (cat sem) (read-token input)
1883 (cond ((eq cat :nmtoken)
1884 (cond ((rod= sem '#.(string-rod "CDATA")) :CDATA)
1885 ((rod= sem '#.(string-rod "ID")) :ID)
1886 ((rod= sem '#.(string-rod "IDREF")) :IDREFS)
1887 ((rod= sem '#.(string-rod "IDREFS")) :IDREFS)
1888 ((rod= sem '#.(string-rod "ENTITY")) :ENTITY)
1889 ((rod= sem '#.(string-rod "ENTITIES")) :ENTITIES)
1890 ((rod= sem '#.(string-rod "NMTOKEN")) :NMTOKEN)
1891 ((rod= sem '#.(string-rod "NMTOKENS")) :NMTOKENS)
1892 ((rod= sem '#.(string-rod "NOTATION"))
1893 (let (names)
1894 (p/S input)
1895 (expect input :\()
1896 (setf names (p/list input #'p/nmtoken :\| ))
1897 (expect input :\))
1898 (when *validate*
1899 (setf (referenced-notations *ctx*)
1900 (append names (referenced-notations *ctx*))))
1901 (cons :NOTATION names)))
1903 (wf-error input "In p/att-type: ~S ~S." cat sem))))
1904 ((eq cat :\()
1905 ;; XXX Die Nmtoken-Syntax pruefen wir derzeit nur beim Validieren.
1906 (let (names)
1907 ;;(expect input :\()
1908 (setf names (p/list input #'p/nmtoken :\| ))
1909 (expect input :\))
1910 (cons :ENUMERATION names)))
1912 (wf-error input "In p/att-type: ~S ~S." cat sem)) )))
1914 (defun p/default-decl (input)
1915 ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1916 ;; | (('#FIXED' S)? AttValue) /* VC: Required Attribute */
1918 ;; /* VC: Attribute Default Legal */
1919 ;; /* WFC: No < in Attribute Values */
1920 ;; /* VC: Fixed Attribute Default */
1921 (multiple-value-bind (cat sem) (peek-token input)
1922 (cond ((eq cat :|#REQUIRED|)
1923 (consume-token input) :REQUIRED)
1924 ((eq cat :|#IMPLIED|)
1925 (consume-token input) :IMPLIED)
1926 ((eq cat :|#FIXED|)
1927 (consume-token input)
1928 (p/S input)
1929 (list :FIXED (p/att-value input)))
1930 ((or (eq cat :\') (eq cat :\"))
1931 (list :DEFAULT (p/att-value input)))
1933 (wf-error input "p/default-decl: ~S ~S." cat sem)) )))
1934 ;;;;
1936 ;; [70] EntityDecl ::= GEDecl | PEDecl
1937 ;; [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1938 ;; [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
1939 ;; [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1940 ;; [74] PEDef ::= EntityValue | ExternalID
1941 ;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1942 ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
1943 ;; [76] NDataDecl ::= S 'NDATA' S Name /* VC: Notation Declared */
1945 (defun p/entity-decl (input)
1946 (let (name def kind)
1947 (expect input :|<!ENTITY|)
1948 (p/S input)
1949 (cond ((eq (peek-token input) :%)
1950 (setf kind :parameter)
1951 (consume-token input)
1952 (p/S input))
1954 (setf kind :general)))
1955 (setf name (p/name input))
1956 (p/S input)
1957 (setf def (p/entity-def input kind))
1958 (define-entity input name kind def)
1959 (p/S? input)
1960 (expect input :\>)))
1962 (defun report-entity (h kind name def)
1963 (etypecase def
1964 (external-entdef
1965 (let ((extid (entdef-extid def))
1966 (ndata (entdef-ndata def)))
1967 (if ndata
1968 (sax:unparsed-entity-declaration h
1969 name
1970 (extid-public extid)
1971 (uri-rod (extid-system extid))
1972 ndata)
1973 (sax:external-entity-declaration h
1974 kind
1975 name
1976 (extid-public extid)
1977 (uri-rod (extid-system extid))))))
1978 (internal-entdef
1979 (sax:internal-entity-declaration h kind name (entdef-value def)))))
1981 (defun p/entity-def (input kind)
1982 (multiple-value-bind (cat sem) (peek-token input)
1983 (cond ((member cat '(:\" :\'))
1984 (make-internal-entdef (p/entity-value input)))
1985 ((and (eq cat :nmtoken)
1986 (or (rod= sem '#.(string-rod "SYSTEM"))
1987 (rod= sem '#.(string-rod "PUBLIC"))))
1988 (let (extid ndata)
1989 (setf extid (p/external-id input nil))
1990 (when (eq kind :general) ;NDATA allowed at all?
1991 (cond ((eq (peek-token input) :S)
1992 (p/S? input)
1993 (when (and (eq (peek-token input) :nmtoken)
1994 (rod= (nth-value 1 (peek-token input))
1995 '#.(string-rod "NDATA")))
1996 (consume-token input)
1997 (p/S input)
1998 (setf ndata (p/nmtoken input))
1999 (when *validate*
2000 (push ndata (referenced-notations *ctx*)))))))
2001 (make-external-entdef extid ndata)))
2003 (wf-error input "p/entity-def: ~S / ~S." cat sem)) )))
2005 (defun p/entity-value (input)
2006 (let ((delim (if (eq (read-token input) :\") #/\" #/\')))
2007 (read-att-value input
2008 (car (zstream-input-stack input))
2009 :ENT
2011 delim)))
2013 (defun p/att-value (input)
2014 (let ((delim (if (eq (read-token input) :\") #/\" #/\')))
2015 (read-att-value input
2016 (car (zstream-input-stack input))
2017 :ATT
2019 delim)))
2021 (defun p/external-id (input &optional (public-only-ok-p nil))
2022 ;; xxx public-only-ok-p
2023 (multiple-value-bind (cat sem) (read-token input)
2024 (cond ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "SYSTEM")))
2025 (p/S input)
2026 (make-extid nil (p/system-literal input)))
2027 ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "PUBLIC")))
2028 (let (pub sys)
2029 (p/S input)
2030 (setf pub (p/pubid-literal input))
2031 (when (eq (peek-token input) :S)
2032 (p/S input)
2033 (when (member (peek-token input) '(:\" :\'))
2034 (setf sys (p/system-literal input))))
2035 (when (and (not public-only-ok-p)
2036 (null sys))
2037 (wf-error input "System identifier needed for this PUBLIC external identifier."))
2038 (make-extid pub sys)))
2040 (wf-error input "Expected external-id: ~S / ~S." cat sem)))))
2043 ;; [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2044 ;; [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2045 ;; [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9]
2046 ;; | [-'()+,./:=?;!*#@$_%]
2048 (defun p/id (input)
2049 (multiple-value-bind (cat) (read-token input)
2050 (cond ((member cat '(:\" :\'))
2051 (let ((delim (if (eq cat :\") #/\" #/\')))
2052 (with-rune-collector (collect)
2053 (loop
2054 (let ((c (read-rune (car (zstream-input-stack input)))))
2055 (cond ((eq c :eof)
2056 (eox input "EOF in system literal."))
2057 ((rune= c delim)
2058 (return))
2060 (collect c))))))))
2062 (wf-error input "Expect either \" or \'.")))))
2064 ;; it is important to cache the orginal URI rod, since the re-serialized
2065 ;; uri-string can be different from the one parsed originally.
2066 (defun uri-rod (uri)
2067 (if uri
2068 (or (getf (puri:uri-plist uri) 'original-rod)
2069 (rod (puri:render-uri uri nil)))
2070 nil))
2072 (defun safe-parse-uri (str)
2073 ;; puri doesn't like strings starting with file:///, although that is a very
2074 ;; common is practise. Cut it away, we don't distinguish between scheme
2075 ;; :FILE and NIL anway.
2076 (when (eql (search "file://" str) 0)
2077 (setf str (subseq str (length "file://"))))
2078 (puri:parse-uri (coerce str 'simple-string)))
2080 (defun p/system-literal (input)
2081 (let* ((rod (p/id input))
2082 (result (safe-parse-uri (rod-string rod))))
2083 (setf (getf (puri:uri-plist result) 'original-rod) rod)
2084 result))
2086 (defun p/pubid-literal (input)
2087 (let ((result (p/id input)))
2088 (unless (every #'pubid-char-p result)
2089 (wf-error input "Illegal pubid: ~S." (rod-string result)))
2090 result))
2093 ;;;;
2095 (defun p/element-decl (input)
2096 (let (name content)
2097 (expect input :|<!ELEMENT|)
2098 (p/S input)
2099 (setf name (p/nmtoken input))
2100 (p/S input)
2101 (setf content (normalize-mixed-cspec (p/cspec input)))
2102 (unless (legal-content-model-p content *validate*)
2103 (wf-error input "Malformed or invalid content model: ~S." (mu content)))
2104 (p/S? input)
2105 (expect input :\>)
2106 (define-element (dtd *ctx*) name content)
2107 (list :element name content)))
2109 (defun maybe-compile-cspec (e)
2110 (or (elmdef-compiled-cspec e)
2111 (setf (elmdef-compiled-cspec e)
2112 (let ((cspec (elmdef-content e)))
2113 (unless cspec
2114 (validity-error "(03) Element Valid: no definition for ~A"
2115 (rod-string (elmdef-name e))))
2116 (multiple-value-call #'cons
2117 (compile-cspec cspec (standalone-check-necessary-p e)))))))
2119 (defun make-root-model (name)
2120 (cons (lambda (actual-name)
2121 (if (rod= actual-name name)
2122 (constantly :dummy)
2123 nil))
2124 (constantly t)))
2126 ;;; content spec validation:
2128 ;;; Given a `contentspec', COMPILE-CSPEC returns as multiple values two
2129 ;;; functions A and B of one argument to be called for every
2130 ;;; A. child element
2131 ;;; B. text child node
2133 ;;; Function A will be called with
2134 ;;; - the element name rod as its argument. If that element may appear
2135 ;;; at the current position, a new function to be called for the next
2136 ;;; child is returned. Otherwise NIL is returned.
2137 ;;; - argument NIL at the end of the element, it must then return T or NIL
2138 ;;; to indicate whether the end tag is valid.
2140 ;;; Function B will be called with the character data rod as its argument, it
2141 ;;; returns a boolean indicating whether this text node is allowed.
2143 ;;; That is, if one of the functions ever returns NIL, the node is
2144 ;;; rejected as invalid.
2146 (defun cmodel-done (actual-value)
2147 (null actual-value))
2149 (defun compile-cspec (cspec &optional standalone-check)
2150 (cond
2151 ((atom cspec)
2152 (ecase cspec
2153 (:EMPTY (values #'cmodel-done (constantly nil)))
2154 (:PCDATA (values #'cmodel-done (constantly t)))
2155 (:ANY
2156 (values (labels ((doit (name) (if name #'doit t))) #'doit)
2157 (constantly t)))))
2158 ((and (eq (car cspec) '*)
2159 (let ((subspec (second cspec)))
2160 (and (eq (car subspec) 'or) (eq (cadr subspec) :PCDATA))))
2161 (values (compile-mixed (second cspec))
2162 (constantly t)))
2164 (values (compile-content-model cspec)
2165 (lambda (rod)
2166 (when standalone-check
2167 (validity-error "(02) Standalone Document Declaration: whitespace"))
2168 (every #'white-space-rune-p rod))))))
2170 (defun compile-mixed (cspec)
2171 ;; das koennten wir theoretisch auch COMPILE-CONTENT-MODEL erledigen lassen
2172 (let ((allowed-names (cddr cspec)))
2173 (labels ((doit (actual-name)
2174 (cond
2175 ((null actual-name) t)
2176 ((member actual-name allowed-names :test #'rod=) #'doit)
2177 (t nil))))
2178 #'doit)))
2180 (defun compile-content-model (cspec &optional (continuation #'cmodel-done))
2181 (if (vectorp cspec)
2182 (lambda (actual-name)
2183 (if (and actual-name (rod= cspec actual-name))
2184 continuation
2185 nil))
2186 (ecase (car cspec)
2187 (and
2188 (labels ((traverse (seq)
2189 (compile-content-model (car seq)
2190 (if (cdr seq)
2191 (traverse (cdr seq))
2192 continuation))))
2193 (traverse (cdr cspec))))
2195 (let ((options (mapcar (rcurry #'compile-content-model continuation)
2196 (cdr cspec))))
2197 (lambda (actual-name)
2198 (some (rcurry #'funcall actual-name) options))))
2200 (let ((maybe (compile-content-model (second cspec) continuation)))
2201 (lambda (actual-name)
2202 (or (funcall maybe actual-name)
2203 (funcall continuation actual-name)))))
2205 (let (maybe-continuation)
2206 (labels ((recurse (actual-name)
2207 (if (null actual-name)
2208 (funcall continuation actual-name)
2209 (or (funcall maybe-continuation actual-name)
2210 (funcall continuation actual-name)))))
2211 (setf maybe-continuation
2212 (compile-content-model (second cspec) #'recurse))
2213 #'recurse)))
2215 (let ((it (cadr cspec)))
2216 (compile-content-model `(and ,it (* ,it)) continuation))))))
2218 (defun setp (list &key (test 'eql))
2219 (equal list (remove-duplicates list :test test)))
2221 (defun legal-content-model-p (cspec &optional validate)
2222 (or (eq cspec :PCDATA)
2223 (eq cspec :ANY)
2224 (eq cspec :EMPTY)
2225 (and (consp cspec)
2226 (eq (car cspec) '*)
2227 (consp (cadr cspec))
2228 (eq (car (cadr cspec)) 'or)
2229 (eq (cadr (cadr cspec)) :PCDATA)
2230 (every #'vectorp (cddr (cadr cspec)))
2231 (if (and validate (not (setp (cddr (cadr cspec)) :test #'rod=)))
2232 (validity-error "VC: No Duplicate Types (07)")
2234 (labels ((walk (x)
2235 (cond ((member x '(:PCDATA :ANY :EMPTY))
2236 nil)
2237 ((atom x) t)
2238 ((and (walk (car x))
2239 (walk (cdr x)))))))
2240 (walk cspec))))
2242 ;; wir fahren besser, wenn wir machen:
2244 ;; cspec ::= 'EMPTY' | 'ANY' | '#PCDATA'
2245 ;; | Name
2246 ;; | cs
2247 ;; cs ::= '(' S? cspec ( S? '|' S? cspec)* S? ')' ('?' | '*' | '+')?
2248 ;; und eine post factum analyse
2250 (defun p/cspec (input &optional recursivep)
2251 (let ((term
2252 (let ((names nil) op-cat op res stream)
2253 (multiple-value-bind (cat sem) (peek-token input)
2254 (cond ((eq cat :nmtoken)
2255 (consume-token input)
2256 (cond ((rod= sem '#.(string-rod "EMPTY"))
2257 :EMPTY)
2258 ((rod= sem '#.(string-rod "ANY"))
2259 :ANY)
2260 ((not recursivep)
2261 (wf-error input "invalid content spec"))
2263 sem)))
2264 ((eq cat :\#PCDATA)
2265 (consume-token input)
2266 :PCDATA)
2267 ((eq cat :\()
2268 (setf stream (car (zstream-input-stack input)))
2269 (consume-token input)
2270 (p/S? input)
2271 (setq names (list (p/cspec input t)))
2272 (p/S? input)
2273 (cond ((member (peek-token input) '(:\| :\,))
2274 (setf op-cat (peek-token input))
2275 (setf op (if (eq op-cat :\,) 'and 'or))
2276 (while (eq (peek-token input) op-cat)
2277 (consume-token input)
2278 (p/S? input)
2279 (push (p/cspec input t) names)
2280 (p/S? input))
2281 (setf res (cons op (reverse names))))
2283 (setf res (cons 'and names))))
2284 (p/S? input)
2285 (expect input :\))
2286 (when *validate*
2287 (unless (eq stream (car (zstream-input-stack input)))
2288 (validity-error "(06) Proper Group/PE Nesting")))
2289 res)
2291 (wf-error input "p/cspec - ~s / ~s" cat sem)))))))
2292 (cond ((eq (peek-token input) :?) (consume-token input) (list '? term))
2293 ((eq (peek-token input) :+) (consume-token input) (list '+ term))
2294 ((eq (peek-token input) :*) (consume-token input) (list '* term))
2296 term))))
2298 (defun normalize-mixed-cspec (cspec)
2299 ;; der Parser oben funktioniert huebsch fuer die children-Regel, aber
2300 ;; fuer Mixed ist das Ergebnis nicht praktisch, denn dort wollen wir
2301 ;; eigentlich auf eine Liste von Namen in einheitlichem Format hinaus.
2302 ;; Dazu normalisieren wir einfach in eine der beiden folgenden Formen:
2303 ;; (* (or :PCDATA ...rods...)) -- und zwar exakt so!
2304 ;; :PCDATA -- sonst ganz trivial
2305 (flet ((trivialp (c)
2306 (and (consp c)
2307 (and (eq (car c) 'and)
2308 (eq (cadr c) :PCDATA)
2309 (null (cddr c))))))
2310 (if (or (trivialp cspec) ;(and PCDATA)
2311 (and (consp cspec) ;(* (and PCDATA))
2312 (and (eq (car cspec) '*)
2313 (null (cddr cspec))
2314 (trivialp (cadr cspec)))))
2315 :PCDATA
2316 cspec)))
2318 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2321 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDefs S? '>'
2322 ;; [52] AttlistDecl ::= '<!ATTLIST' S Name S? '>'
2323 ;; [53] AttDefs ::= S Name S AttType S DefaultDecl AttDefs
2324 ;; [53] AttDefs ::=
2326 (defun p/notation-decl (input)
2327 (let (name id)
2328 (expect input :|<!NOTATION|)
2329 (p/S input)
2330 (setf name (p/name input))
2331 (p/S input)
2332 (setf id (p/external-id input t))
2333 (p/S? input)
2334 (expect input :\>)
2335 (sax:notation-declaration (handler *ctx*)
2336 name
2337 (if (extid-public id)
2338 (normalize-public-id (extid-public id))
2339 nil)
2340 (uri-rod (extid-system id)))
2341 (when (and sax:*namespace-processing* (find #/: name))
2342 (wf-error input "colon in notation name"))
2343 (when *validate*
2344 (define-notation (dtd *ctx*) name id))
2345 (list :notation-decl name id)))
2347 (defun normalize-public-id (rod)
2348 (with-rune-collector (collect)
2349 (let ((gimme-20 nil)
2350 (anything-seen-p nil))
2351 (map nil (lambda (c)
2352 (cond
2353 ((or (rune= c #/u+0009)
2354 (rune= c #/u+000A)
2355 (rune= c #/u+000D)
2356 (rune= c #/u+0020))
2357 (setf gimme-20 t))
2359 (when (and anything-seen-p gimme-20)
2360 (collect #/u+0020))
2361 (setf gimme-20 nil)
2362 (setf anything-seen-p t)
2363 (collect c))))
2364 rod))))
2368 (defun p/conditional-sect (input)
2369 (expect input :<!\[ )
2370 (let ((stream (car (zstream-input-stack input))))
2371 (p/S? input)
2372 (multiple-value-bind (cat sem) (read-token input)
2373 (cond ((and (eq cat :nmtoken)
2374 (rod= sem '#.(string-rod "INCLUDE")))
2375 (p/include-sect input stream))
2376 ((and (eq cat :nmtoken)
2377 (rod= sem '#.(string-rod "IGNORE")))
2378 (p/ignore-sect input stream))
2380 (wf-error input "Expected INCLUDE or IGNORE after \"<![\"."))))))
2382 (defun p/cond-expect (input cat initial-stream)
2383 (expect input cat)
2384 (when *validate*
2385 (unless (eq (car (zstream-input-stack input)) initial-stream)
2386 (validity-error "(21) Proper Conditional Section/PE Nesting"))))
2388 (defun p/include-sect (input initial-stream)
2389 ;; <![INCLUDE is already read.
2390 (p/S? input)
2391 (p/cond-expect input :\[ initial-stream)
2392 (p/ext-subset-decl input)
2393 (p/cond-expect input :\] initial-stream)
2394 (p/cond-expect input :\] initial-stream)
2395 (p/cond-expect input :\> initial-stream))
2397 (defun p/ignore-sect (input initial-stream)
2398 ;; <![IGNORE is already read.
2399 ;; XXX Is VC 21 being checked for nested sections?
2400 (p/S? input)
2401 (p/cond-expect input :\[ initial-stream)
2402 (let ((input (car (zstream-input-stack input))))
2403 (let ((level 0))
2404 (do ((c1 (read-rune input) (read-rune input))
2405 (c2 #/U+0000 c1)
2406 (c3 #/U+0000 c2))
2407 ((= level -1))
2408 (declare (type fixnum level))
2409 (cond ((eq c1 :eof)
2410 (eox input "EOF in <![IGNORE ... >")))
2411 (cond ((and (rune= c3 #/<) (rune= c2 #/!) (rune= c1 #/\[))
2412 (incf level)))
2413 (cond ((and (rune= c3 #/\]) (rune= c2 #/\]) (rune= c1 #/>))
2414 (decf level))) )))
2415 (unless (eq (car (zstream-input-stack input)) initial-stream)
2416 (validity-error "(21) Proper Conditional Section/PE Nesting")))
2418 (defun p/ext-subset-decl (input)
2419 ;; ( markupdecl | conditionalSect | S )*
2420 (loop
2421 (case (let ((*expand-pe-p* nil)) (peek-token input))
2422 (:|<![| (let ((*expand-pe-p* t)) (p/conditional-sect input)))
2423 (:S (consume-token input))
2424 (:eof (return))
2425 ((:|<!ELEMENT| :|<!ATTLIST| :|<!ENTITY| :|<!NOTATION| :PI :COMMENT)
2426 (let ((*expand-pe-p* t)
2427 (*external-subset-p* t))
2428 (p/markup-decl input)))
2429 ((:PE-REFERENCE)
2430 (let ((name (nth-value 1 (read-token input))))
2431 (recurse-on-entity input name :parameter
2432 (lambda (input)
2433 (etypecase (checked-get-entdef name :parameter)
2434 (external-entdef
2435 (p/ext-subset input))
2436 (internal-entdef
2437 (p/ext-subset-decl input)))
2438 (unless (eq :eof (peek-token input))
2439 (wf-error input "Trailing garbage."))))))
2440 (otherwise (return)))) )
2442 (defun p/markup-decl (input)
2443 (peek-token input)
2444 (let ((stream (car (zstream-input-stack input))))
2445 (multiple-value-prog1
2446 (p/markup-decl-unsafe input)
2447 (when *validate*
2448 (unless (eq stream (car (zstream-input-stack input)))
2449 (validity-error "(01) Proper Declaration/PE Nesting"))))))
2451 (defun p/markup-decl-unsafe (input)
2452 ;; markupdecl ::= elementdecl | AttlistDecl /* VC: Proper Declaration/PE Nesting */
2453 ;; | EntityDecl | NotationDecl
2454 ;; | PI | Comment /* WFC: PEs in Internal Subset */
2455 (let ((token (peek-token input))
2456 (*expand-pe-p* (and *expand-pe-p* *external-subset-p*)))
2457 (case token
2458 (:|<!ELEMENT| (p/element-decl input))
2459 (:|<!ATTLIST| (p/attlist-decl input))
2460 (:|<!ENTITY| (p/entity-decl input))
2461 (:|<!NOTATION| (p/notation-decl input))
2462 (:PI
2463 (let ((sem (nth-value 1 (read-token input))))
2464 (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))))
2465 (:COMMENT (consume-token input))
2466 (otherwise
2467 (wf-error input "p/markup-decl ~S" (peek-token input))))))
2469 (defun setup-encoding (input xml-header)
2470 (when (xml-header-encoding xml-header)
2471 (let ((enc (find-encoding (xml-header-encoding xml-header))))
2472 (cond (enc
2473 (setf (xstream-encoding (car (zstream-input-stack input)))
2474 enc))
2476 (warn "There is no such encoding: ~S." (xml-header-encoding xml-header)))))))
2478 (defun set-full-speed (input)
2479 (let ((xstream (car (zstream-input-stack input))))
2480 (when xstream
2481 (set-to-full-speed xstream))))
2483 (defun p/ext-subset (input)
2484 (cond ((eq (peek-token input) :xml-decl)
2485 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
2486 (setup-encoding input hd))
2487 (consume-token input)))
2488 (set-full-speed input)
2489 (p/ext-subset-decl input)
2490 (unless (eq (peek-token input) :eof)
2491 (wf-error input "Trailing garbage - ~S." (peek-token input))))
2493 (defvar *catalog* nil)
2495 (defun extid-using-catalog (extid)
2496 (if *catalog*
2497 (let ((sysid
2498 (resolve-extid (extid-public extid)
2499 (extid-system extid)
2500 *catalog*)))
2501 (if sysid
2502 (make-extid nil sysid)
2503 extid))
2504 extid))
2506 (defun p/doctype-decl (input &optional dtd-extid)
2507 (let ()
2508 (let ((*expand-pe-p* nil)
2509 name extid)
2510 (expect input :|<!DOCTYPE|)
2511 (p/S input)
2512 (setq name (p/nmtoken input))
2513 (when *validate*
2514 (setf (model-stack *ctx*) (list (make-root-model name))))
2515 (when (eq (peek-token input) :S)
2516 (p/S input)
2517 (unless (or (eq (peek-token input) :\[ )
2518 (eq (peek-token input) :\> ))
2519 (setf extid (p/external-id input t))))
2520 (when dtd-extid
2521 (setf extid dtd-extid))
2522 (p/S? input)
2523 (sax:start-dtd (handler *ctx*)
2524 name
2525 (and extid (extid-public extid))
2526 (and extid (uri-rod (extid-system extid))))
2527 (when (eq (peek-token input) :\[ )
2528 (when (disallow-internal-subset *ctx*)
2529 (wf-error input "document includes an internal subset"))
2530 (ensure-dtd)
2531 (consume-token input)
2532 (sax:start-internal-subset (handler *ctx*))
2533 (while (progn (p/S? input)
2534 (not (eq (peek-token input) :\] )))
2535 (if (eq (peek-token input) :PE-REFERENCE)
2536 (let ((name (nth-value 1 (read-token input))))
2537 (recurse-on-entity input name :parameter
2538 (lambda (input)
2539 (etypecase (checked-get-entdef name :parameter)
2540 (external-entdef
2541 (p/ext-subset input))
2542 (internal-entdef
2543 (p/ext-subset-decl input)))
2544 (unless (eq :eof (peek-token input))
2545 (wf-error input "Trailing garbage.")))))
2546 (let ((*expand-pe-p* t))
2547 (p/markup-decl input))))
2548 (consume-token input)
2549 (sax:end-internal-subset (handler *ctx*))
2550 (p/S? input))
2551 (expect input :>)
2552 (when extid
2553 (let* ((effective-extid
2554 (extid-using-catalog (absolute-extid input extid)))
2555 (sysid (extid-system effective-extid))
2556 (fresh-dtd-p (null (dtd *ctx*)))
2557 (cached-dtd
2558 (and fresh-dtd-p
2559 (not (standalone-p *ctx*))
2560 (getdtd sysid *dtd-cache*))))
2561 (cond
2562 (cached-dtd
2563 (setf (dtd *ctx*) cached-dtd)
2564 (report-cached-dtd cached-dtd))
2566 (let ((xi2 (xstream-open-extid effective-extid)))
2567 (with-zstream (zi2 :input-stack (list xi2))
2568 (ensure-dtd)
2569 (p/ext-subset zi2)
2570 (when (and fresh-dtd-p
2571 *cache-all-dtds*
2572 *validate*
2573 (not (standalone-p *ctx*)))
2574 (setf (getdtd sysid *dtd-cache*) (dtd *ctx*)))))))))
2575 (sax:end-dtd (handler *ctx*))
2576 (let ((dtd (dtd *ctx*)))
2577 (sax:entity-resolver
2578 (handler *ctx*)
2579 (lambda (name handler) (resolve-entity name handler dtd)))
2580 (sax::dtd (handler *ctx*) dtd))
2581 (list :DOCTYPE name extid))))
2583 (defun report-cached-dtd (dtd)
2584 (maphash (lambda (k v)
2585 (report-entity (handler *ctx*) :general k (cdr v)))
2586 (dtd-gentities dtd))
2587 (maphash (lambda (k v)
2588 (report-entity (handler *ctx*) :parameter k (cdr v)))
2589 (dtd-pentities dtd))
2590 (maphash (lambda (k v)
2591 (sax:notation-declaration
2592 (handler *ctx*)
2594 (if (extid-public v)
2595 (normalize-public-id (extid-public v))
2596 nil)
2597 (uri-rod (extid-system v))))
2598 (dtd-notations dtd)))
2600 (defun p/misc*-2 (input)
2601 ;; Misc*
2602 (while (member (peek-token input) '(:COMMENT :PI :S))
2603 (case (peek-token input)
2604 (:COMMENT
2605 (sax:comment (handler *ctx*) (nth-value 1 (peek-token input))))
2606 (:PI
2607 (sax:processing-instruction
2608 (handler *ctx*)
2609 (car (nth-value 1 (peek-token input)))
2610 (cdr (nth-value 1 (peek-token input))))))
2611 (consume-token input)))
2613 (defun p/document
2614 (input handler
2615 &key validate dtd root entity-resolver disallow-internal-subset
2616 (recode t))
2617 ;; check types of user-supplied arguments for better error messages:
2618 (check-type validate boolean)
2619 (check-type recode boolean)
2620 (check-type dtd (or null extid))
2621 (check-type root (or null rod))
2622 (check-type entity-resolver (or null function symbol))
2623 (check-type disallow-internal-subset boolean)
2624 #+rune-is-integer
2625 (when recode
2626 (setf handler (make-recoder handler #'rod-to-utf8-string)))
2627 (let* ((xstream (car (zstream-input-stack input)))
2628 (name (xstream-name xstream))
2629 (base (when name (stream-name-uri name)))
2630 (*ctx*
2631 (make-context :handler handler
2632 :main-zstream input
2633 :entity-resolver entity-resolver
2634 :base-stack (list (or base ""))
2635 :disallow-internal-subset disallow-internal-subset))
2636 (*validate* validate)
2637 (*namespace-bindings* *initial-namespace-bindings*))
2638 (sax:register-sax-parser handler (make-instance 'cxml-parser :ctx *ctx*))
2639 (sax:start-document handler)
2640 ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
2641 ;; Misc ::= Comment | PI | S
2642 ;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
2643 ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
2644 (let ((*data-behaviour* :DTD))
2645 ;; optional XMLDecl?
2646 (p/xmldecl input)
2647 ;; Misc*
2648 (p/misc*-2 input)
2649 ;; (doctypedecl Misc*)?
2650 (cond
2651 ((eq (peek-token input) :<!DOCTYPE)
2652 (p/doctype-decl input dtd)
2653 (p/misc*-2 input))
2654 (dtd
2655 (synthesize-doctype dtd input))
2656 ((and validate (not dtd))
2657 (validity-error "invalid document: no doctype")))
2658 (ensure-dtd)
2659 ;; Override expected root element if asked to
2660 (when root
2661 (setf (model-stack *ctx*) (list (make-root-model root))))
2662 ;; element
2663 (let ((*data-behaviour* :DOC))
2664 (fix-seen-< input)
2665 (p/element input))
2666 ;; optional Misc*
2667 (p/misc*-2 input)
2668 (p/eof input)
2669 (sax:end-document handler))))
2671 (defun synthesize-doctype (dtd input)
2672 (let ((dummy (string->xstream "<!DOCTYPE dummy>")))
2673 (setf (xstream-name dummy)
2674 (make-stream-name
2675 :entity-name "dummy doctype"
2676 :entity-kind :main
2677 :uri (zstream-base-sysid input)))
2678 (with-zstream (zstream :input-stack (list dummy))
2679 (p/doctype-decl zstream dtd))))
2681 (defun fix-seen-< (input)
2682 (when (eq (peek-token input) :seen-<)
2683 (multiple-value-bind (c s)
2684 (read-token-after-|<| input (car (zstream-input-stack input)))
2685 (setf (zstream-token-category input) c
2686 (zstream-token-semantic input) s))))
2688 (defun p/xmldecl (input)
2689 ;; we will use the attribute-value parser for the xml decl.
2690 (prog1
2691 (when (eq (peek-token input) :xml-decl)
2692 (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input))))))
2693 (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes))
2694 (setup-encoding input hd)
2695 (read-token input)
2696 hd))
2697 (set-full-speed input)))
2699 (defun p/eof (input)
2700 (unless (eq (peek-token input) :eof)
2701 (wf-error input "Garbage at end of document."))
2702 (when *validate*
2703 (maphash (lambda (k v)
2704 (unless v
2705 (validity-error "(11) IDREF: ~S not defined" (rod-string k))))
2706 (id-table *ctx*))
2708 (dolist (name (referenced-notations *ctx*))
2709 (unless (find-notation name (dtd *ctx*))
2710 (validity-error "(23) Notation Declared: ~S" (rod-string name))))))
2712 (defun p/element (input)
2713 (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
2714 (sax:start-element (handler *ctx*) uri lname qname attrs)
2715 (when (eq cat :stag)
2716 (let ((*namespace-bindings* n-b))
2717 (p/content input))
2718 (p/etag input qname))
2719 (sax:end-element (handler *ctx*) uri lname qname)
2720 (undeclare-namespaces new-b)
2721 (pop (base-stack *ctx*))
2722 (validate-end-element *ctx* qname)))
2724 (defun p/sztag (input)
2725 (multiple-value-bind (cat sem) (read-token input)
2726 (case cat
2727 ((:stag :ztag))
2728 (:eof (eox input))
2729 (t (wf-error input "element expected")))
2730 (destructuring-bind (&optional name &rest raw-attrs) sem
2731 (validate-start-element *ctx* name)
2732 (let* ((attrs
2733 (process-attributes *ctx* name (build-attribute-list raw-attrs)))
2734 (*namespace-bindings* *namespace-bindings*)
2735 new-namespaces)
2736 (when sax:*namespace-processing*
2737 (setf new-namespaces (declare-namespaces attrs))
2738 (mapc #'set-attribute-namespace attrs))
2739 (push (compute-base attrs) (base-stack *ctx*))
2740 (multiple-value-bind (uri prefix local-name)
2741 (if sax:*namespace-processing*
2742 (decode-qname name)
2743 (values nil nil nil))
2744 (declare (ignore prefix))
2745 (check-attribute-uniqueness attrs)
2746 (unless (or sax:*include-xmlns-attributes*
2747 (null sax:*namespace-processing*))
2748 (setf attrs
2749 (remove-if (compose #'xmlns-attr-p #'sax:attribute-qname)
2750 attrs)))
2751 (values cat
2752 *namespace-bindings*
2753 new-namespaces
2754 uri local-name name attrs))))))
2756 (defun p/etag (input qname)
2757 (multiple-value-bind (cat2 sem2) (read-token input)
2758 (unless (and (eq cat2 :etag)
2759 (eq (car sem2) qname))
2760 (wf-error input "Bad nesting. ~S / ~S"
2761 (mu qname)
2762 (mu (cons cat2 sem2))))
2763 (when (cdr sem2)
2764 (wf-error input "no attributes allowed in end tag"))))
2766 ;; copy&paste from cxml-rng
2767 (defun escape-uri (string)
2768 (with-output-to-string (out)
2769 (loop for c across (cxml::rod-to-utf8-string string) do
2770 (let ((code (char-code c)))
2771 ;; http://www.w3.org/TR/xlink/#link-locators
2772 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
2773 (format out "%~2,'0X" code)
2774 (write-char c out))))))
2776 (defun compute-base (attrs)
2777 (let ((new (sax:find-attribute #"xml:base" attrs))
2778 (current (car (base-stack *ctx*))))
2779 (if new
2780 (puri:merge-uris (escape-uri (sax:attribute-value new)) current)
2781 current)))
2783 (defun process-characters (input sem)
2784 (consume-token input)
2785 (when (search #"]]>" sem)
2786 (wf-error input "']]>' not allowed in CharData"))
2787 (validate-characters *ctx* sem))
2789 (defun process-cdata-section (input)
2790 (consume-token input)
2791 (let ((input (car (zstream-input-stack input))))
2792 (unless (and (rune= #/C (read-rune input))
2793 (rune= #/D (read-rune input))
2794 (rune= #/A (read-rune input))
2795 (rune= #/T (read-rune input))
2796 (rune= #/A (read-rune input))
2797 (rune= #/\[ (read-rune input)))
2798 (wf-error input "After '<![', 'CDATA[' is expected."))
2799 (validate-characters *ctx* #"hack") ;anything other than whitespace
2800 (read-cdata-sect input)))
2802 (defun p/content (input)
2803 ;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
2804 (loop
2805 (multiple-value-bind (cat sem) (peek-token input)
2806 (case cat
2807 ((:stag :ztag)
2808 (p/element input))
2809 ((:CDATA)
2810 (process-characters input sem)
2811 (sax:characters (handler *ctx*) sem))
2812 ((:ENTITY-REF)
2813 (let ((name sem))
2814 (consume-token input)
2815 (recurse-on-entity input name :general
2816 (lambda (input)
2817 (prog1
2818 (etypecase (checked-get-entdef name :general)
2819 (internal-entdef (p/content input))
2820 (external-entdef (p/ext-parsed-ent input)))
2821 (unless (eq (peek-token input) :eof)
2822 (wf-error input "Trailing garbage. - ~S"
2823 (peek-token input))))))))
2824 ((:<!\[)
2825 (let ((data (process-cdata-section input)))
2826 (sax:start-cdata (handler *ctx*))
2827 (sax:characters (handler *ctx*) data)
2828 (sax:end-cdata (handler *ctx*))))
2829 ((:PI)
2830 (consume-token input)
2831 (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem)))
2832 ((:COMMENT)
2833 (consume-token input)
2834 (sax:comment (handler *ctx*) sem))
2835 (otherwise
2836 (return))))))
2838 ;; [78] extParsedEnt ::= TextDecl? contentw
2839 ;; [79] extPE ::= TextDecl? extSubsetDecl
2841 (defstruct xml-header
2842 version
2843 encoding
2844 (standalone-p nil))
2846 (defun p/ext-parsed-ent (input)
2847 ;; [78] extParsedEnt ::= '<?xml' VersionInfo? EncodingDecl S? '?>' content
2848 (when (eq (peek-token input) :xml-decl)
2849 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
2850 (setup-encoding input hd))
2851 (consume-token input))
2852 (set-full-speed input)
2853 (p/content input))
2855 (defun parse-xml-decl (content)
2856 (let* ((res (make-xml-header))
2857 (i (make-rod-xstream content)))
2858 (with-zstream (z :input-stack (list i))
2859 (let ((atts (read-attribute-list z i t)))
2860 (unless (eq (peek-rune i) :eof)
2861 (wf-error i "Garbage at end of XMLDecl."))
2862 ;; versioninfo muss da sein
2863 ;; dann ? encodingdecl
2864 ;; dann ? sddecl
2865 ;; dann ende
2866 (unless (eq (caar atts) (intern-name '#.(string-rod "version")))
2867 (wf-error i "XMLDecl needs version."))
2868 (unless (and (>= (length (cdar atts)) 1)
2869 (every (lambda (x)
2870 (or (rune<= #/a x #/z)
2871 (rune<= #/A x #/Z)
2872 (rune<= #/0 x #/9)
2873 (rune= x #/_)
2874 (rune= x #/.)
2875 (rune= x #/:)
2876 (rune= x #/-)))
2877 (cdar atts)))
2878 (wf-error i"Bad XML version number: ~S." (rod-string (cdar atts))))
2879 (setf (xml-header-version res) (rod-string (cdar atts)))
2880 (pop atts)
2881 (when (eq (caar atts) (intern-name '#.(string-rod "encoding")))
2882 (unless (and (>= (length (cdar atts)) 1)
2883 (every (lambda (x)
2884 (or (rune<= #/a x #/z)
2885 (rune<= #/A x #/Z)
2886 (rune<= #/0 x #/9)
2887 (rune= x #/_)
2888 (rune= x #/.)
2889 (rune= x #/-)))
2890 (cdar atts))
2891 ((lambda (x)
2892 (or (rune<= #/a x #/z)
2893 (rune<= #/A x #/Z)))
2894 (aref (cdar atts) 0)))
2895 (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
2896 (setf (xml-header-encoding res) (rod-string (cdar atts)))
2897 (pop atts))
2898 (when (eq (caar atts) (intern-name '#.(string-rod "standalone")))
2899 (unless (or (rod= (cdar atts) '#.(string-rod "yes"))
2900 (rod= (cdar atts) '#.(string-rod "no")))
2901 (wf-error i "XMLDecl's 'standalone' attribute must be exactly \"yes\" or \"no\" and not ~S."
2902 (rod-string (cdar atts))))
2903 (setf (xml-header-standalone-p res)
2904 (if (rod-equal '#.(string-rod "yes") (cdar atts))
2905 :yes
2906 :no))
2907 (pop atts))
2908 (when atts
2909 (wf-error i "Garbage in XMLDecl: ~A" (rod-string content)))
2910 res))))
2912 (defun parse-text-decl (content)
2913 (let* ((res (make-xml-header))
2914 (i (make-rod-xstream content)))
2915 (with-zstream (z :input-stack (list i))
2916 (let ((atts (read-attribute-list z i t)))
2917 (unless (eq (peek-rune i) :eof)
2918 (wf-error i "Garbage at end of TextDecl"))
2919 ;; versioninfo optional
2920 ;; encodingdecl muss da sein
2921 ;; dann ende
2922 (when (eq (caar atts) (intern-name '#.(string-rod "version")))
2923 (unless (and (>= (length (cdar atts)) 1)
2924 (every (lambda (x)
2925 (or (rune<= #/a x #/z)
2926 (rune<= #/A x #/Z)
2927 (rune<= #/0 x #/9)
2928 (rune= x #/_)
2929 (rune= x #/.)
2930 (rune= x #/:)
2931 (rune= x #/-)))
2932 (cdar atts)))
2933 (wf-error i "Bad XML version number: ~S." (rod-string (cdar atts))))
2934 (setf (xml-header-version res) (rod-string (cdar atts)))
2935 (pop atts))
2936 (unless (eq (caar atts) (intern-name '#.(string-rod "encoding")))
2937 (wf-error i "TextDecl needs encoding."))
2938 (unless (and (>= (length (cdar atts)) 1)
2939 (every (lambda (x)
2940 (or (rune<= #/a x #/z)
2941 (rune<= #/A x #/Z)
2942 (rune<= #/0 x #/9)
2943 (rune= x #/_)
2944 (rune= x #/.)
2945 (rune= x #/-)))
2946 (cdar atts))
2947 ((lambda (x)
2948 (or (rune<= #/a x #/z)
2949 (rune<= #/A x #/Z)
2950 (rune<= #/0 x #/9)))
2951 (aref (cdar atts) 0)))
2952 (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
2953 (setf (xml-header-encoding res) (rod-string (cdar atts)))
2954 (pop atts)
2955 (when atts
2956 (wf-error i "Garbage in TextDecl: ~A" (rod-string content)))))
2957 res))
2959 ;;;; ---------------------------------------------------------------------------
2960 ;;;; mu
2961 ;;;;
2963 (defun mu (x)
2964 (cond ((stringp x) x)
2965 ((vectorp x) (rod-string x))
2966 ((consp x)
2967 (cons (mu (car x)) (mu (cdr x))))
2968 (x)))
2970 ;;;; ---------------------------------------------------------------------------
2971 ;;;; User interface ;;;;
2973 #-cxml-system::uri-is-namestring
2974 (defun specific-or (component &optional (alternative nil))
2975 (if (eq component :unspecific)
2976 alternative
2977 component))
2979 (defun string-or (str &optional (alternative nil))
2980 (if (zerop (length str))
2981 alternative
2982 str))
2984 #-cxml-system::uri-is-namestring
2985 (defun make-uri (&rest initargs &key path query &allow-other-keys)
2986 (apply #'make-instance
2987 'puri:uri
2988 :path (and path (escape-path path))
2989 :query (and query (escape-query query))
2990 initargs))
2992 #-cxml-system::uri-is-namestring
2993 (defun escape-path (list)
2994 (puri::render-parsed-path list t))
2996 #-cxml-system::uri-is-namestring
2997 (defun escape-query (pairs)
2998 (flet ((escape (str)
2999 (puri::encode-escaped-encoding str puri::*reserved-characters* t)))
3000 (let ((first t))
3001 (with-output-to-string (s)
3002 (dolist (pair pairs)
3003 (if first
3004 (setf first nil)
3005 (write-char #\& s))
3006 (write-string (escape (car pair)) s)
3007 (write-char #\= s)
3008 (write-string (escape (cdr pair)) s))))))
3010 #-cxml-system::uri-is-namestring
3011 (defun uri-parsed-query (uri)
3012 (flet ((unescape (str)
3013 (puri::decode-escaped-encoding str t puri::*reserved-characters*)))
3014 (let ((str (puri:uri-query uri)))
3015 (cond
3016 (str
3017 (let ((pairs '()))
3018 (dolist (s (split-sequence-if (lambda (x) (eql x #\&)) str))
3019 (destructuring-bind (name value)
3020 (split-sequence-if (lambda (x) (eql x #\=)) s)
3021 (push (cons (unescape name) (unescape value)) pairs)))
3022 (reverse pairs)))
3024 nil)))))
3026 #-cxml-system::uri-is-namestring
3027 (defun query-value (name alist)
3028 (cdr (assoc name alist :test #'equal)))
3030 #-cxml-system::uri-is-namestring
3031 (defun pathname-to-uri (pathname)
3032 (let ((path
3033 (append (pathname-directory pathname)
3034 (list
3035 (if (specific-or (pathname-type pathname))
3036 (concatenate 'string
3037 (pathname-name pathname)
3039 (pathname-type pathname))
3040 (pathname-name pathname))))))
3041 (if (eq (car path) :relative)
3042 (make-uri :path path)
3043 (make-uri :scheme :file
3044 :host (concatenate 'string
3045 (string-or (host-namestring pathname))
3047 (specific-or (pathname-device pathname)))
3048 :path path))))
3050 #+cxml-system::uri-is-namestring
3051 (defun pathname-to-uri (pathname)
3052 (puri:parse-uri (namestring pathname)))
3054 #-cxml-system::uri-is-namestring
3055 (defun parse-name.type (str)
3056 (if str
3057 (let ((i (position #\. str :from-end t)))
3058 (if i
3059 (values (subseq str 0 i) (subseq str (1+ i)))
3060 (values str nil)))
3061 (values nil nil)))
3063 #-cxml-system::uri-is-namestring
3064 (defun uri-to-pathname (uri)
3065 (let ((scheme (puri:uri-scheme uri))
3066 (path (puri:uri-parsed-path uri)))
3067 (unless (member scheme '(nil :file))
3068 (error 'xml-parse-error
3069 :format-control "URI scheme ~S not supported"
3070 :format-arguments (list scheme)))
3071 (if (eq (car path) :relative)
3072 (multiple-value-bind (name type)
3073 (parse-name.type (car (last path)))
3074 (make-pathname :directory (butlast path)
3075 :name name
3076 :type type))
3077 (multiple-value-bind (name type)
3078 (parse-name.type (car (last (cdr path))))
3079 (destructuring-bind (host device)
3080 (split-sequence-if (lambda (x) (eql x #\+))
3081 (or (puri:uri-host uri) "+"))
3082 (make-pathname :host (string-or host)
3083 :device (string-or device)
3084 :directory (cons :absolute (butlast (cdr path)))
3085 :name name
3086 :type type))))))
3087 #+cxml-system::uri-is-namestring
3088 (defun uri-to-pathname (uri)
3089 (let ((pathname (puri:render-uri uri nil)))
3090 (when (equalp (pathname-host pathname) "+")
3091 (setf (slot-value pathname 'lisp::host) "localhost"))
3092 pathname))
3094 (defun parse
3095 (input handler &rest args
3096 &key validate dtd root entity-resolver disallow-internal-subset
3097 recode pathname)
3098 (declare (ignore validate dtd root entity-resolver disallow-internal-subset
3099 recode))
3100 (let ((args
3101 (loop
3102 for (name value) on args by #'cddr
3103 unless (eq name :pathname)
3104 append (list name value))))
3105 (etypecase input
3106 (xstream (apply #'parse-xstream input handler args))
3107 (pathname (apply #'parse-file input handler args))
3108 (rod (apply #'parse-rod input handler args))
3109 (array (apply #'parse-octets input handler args))
3110 (stream
3111 (let ((xstream (make-xstream input :speed 8192)))
3112 (setf (xstream-name xstream)
3113 (make-stream-name
3114 :entity-name "main document"
3115 :entity-kind :main
3116 :uri (if pathname
3117 (pathname-to-uri (merge-pathnames pathname))
3118 (safe-stream-sysid input))))
3119 (apply #'parse-xstream xstream handler args))))))
3121 (defun parse-xstream (xstream handler &rest args)
3122 (let ((*ctx* nil))
3123 (handler-case
3124 (with-zstream (zstream :input-stack (list xstream))
3125 (peek-rune xstream)
3126 (with-scratch-pads ()
3127 (apply #'p/document zstream handler args)))
3128 (runes-encoding:encoding-error (c)
3129 (wf-error xstream "~A" c)))))
3131 (defun parse-file (filename handler &rest args)
3132 (with-open-xfile (input filename)
3133 (setf (xstream-name input)
3134 (make-stream-name
3135 :entity-name "main document"
3136 :entity-kind :main
3137 :uri (pathname-to-uri (merge-pathnames filename))))
3138 (apply #'parse-xstream input handler args)))
3140 (defun resolve-synonym-stream (stream)
3141 (while (typep stream 'synonym-stream)
3142 (setf stream (symbol-value (synonym-stream-symbol stream))))
3143 stream)
3145 (defun safe-stream-sysid (stream)
3146 (if (and (typep (resolve-synonym-stream stream) 'file-stream)
3147 ;; ignore-errors, because sb-bsd-sockets creates instances of
3148 ;; FILE-STREAMs that aren't
3149 (ignore-errors (pathname stream)))
3150 (pathname-to-uri (merge-pathnames (pathname stream)))
3151 nil))
3153 (defun parse-stream (stream handler &rest args)
3154 (let ((xstream
3155 (make-xstream
3156 stream
3157 :name (make-stream-name
3158 :entity-name "main document"
3159 :entity-kind :main
3160 :uri (safe-stream-sysid stream))
3161 :initial-speed 1)))
3162 (apply #'parse-xstream xstream handler args)))
3164 (defun parse-empty-document
3165 (uri qname handler &key public-id system-id entity-resolver (recode t))
3166 (check-type uri (or null rod))
3167 (check-type qname (or null rod))
3168 (check-type public-id (or null rod))
3169 (check-type system-id (or null puri:uri))
3170 (check-type entity-resolver (or null function symbol))
3171 (check-type recode boolean)
3172 #+rune-is-integer
3173 (when recode
3174 (setf handler (make-recoder handler #'rod-to-utf8-string)))
3175 (let ((*ctx*
3176 (make-context :handler handler :entity-resolver entity-resolver))
3177 (*validate* nil)
3178 (extid
3179 (when (or public-id system-id)
3180 (extid-using-catalog (make-extid public-id system-id)))))
3181 (sax:start-document handler)
3182 (when extid
3183 (sax:start-dtd handler
3184 qname
3185 (and public-id)
3186 (and system-id (uri-rod system-id)))
3187 (setf (dtd *ctx*) (getdtd (extid-system extid) *dtd-cache*))
3188 (unless (dtd *ctx*)
3189 (with-scratch-pads ()
3190 (let ((*data-behaviour* :DTD))
3191 (let ((xi2 (xstream-open-extid extid)))
3192 (with-zstream (zi2 :input-stack (list xi2))
3193 (ensure-dtd)
3194 (p/ext-subset zi2))))))
3195 (sax:end-dtd handler)
3196 (let ((dtd (dtd *ctx*)))
3197 (sax:entity-resolver handler (lambda (n h) (resolve-entity n h dtd)))
3198 (sax::dtd handler dtd)))
3199 (ensure-dtd)
3200 (when (or uri qname)
3201 (let* ((attrs
3202 (when uri
3203 (list (sax:make-attribute :qname #"xmlns"
3204 :value (rod uri)
3205 :specified-p t))))
3206 (*namespace-bindings* *namespace-bindings*)
3207 new-namespaces)
3208 (when sax:*namespace-processing*
3209 (setf new-namespaces (declare-namespaces attrs))
3210 (mapc #'set-attribute-namespace attrs))
3211 (multiple-value-bind (uri prefix local-name)
3212 (if sax:*namespace-processing* (decode-qname qname) nil)
3213 (declare (ignore prefix))
3214 (unless (or sax:*include-xmlns-attributes*
3215 (null sax:*namespace-processing*))
3216 (setf attrs nil))
3217 (sax:start-element (handler *ctx*) uri local-name qname attrs)
3218 (sax:end-element (handler *ctx*) uri local-name qname))
3219 (undeclare-namespaces new-namespaces)))
3220 (sax:end-document handler)))
3222 (defun parse-dtd-file (filename &optional handler)
3223 (with-open-file (s filename :element-type '(unsigned-byte 8))
3224 (parse-dtd-stream s handler)))
3226 (defun parse-dtd-stream (stream &optional handler)
3227 (let ((input (make-xstream stream)))
3228 (setf (xstream-name input)
3229 (make-stream-name
3230 :entity-name "dtd"
3231 :entity-kind :main
3232 :uri (safe-stream-sysid stream)))
3233 (let ((*ctx* (make-context :handler handler))
3234 (*validate* t)
3235 (*data-behaviour* :DTD))
3236 (with-zstream (zstream :input-stack (list input))
3237 (with-scratch-pads ()
3238 (ensure-dtd)
3239 (peek-rune input)
3240 (p/ext-subset zstream)
3241 (dtd *ctx*))))))
3243 (defun parse-rod (string handler &rest args)
3244 (let ((xstream (string->xstream string)))
3245 (setf (xstream-name xstream)
3246 (make-stream-name
3247 :entity-name "main document"
3248 :entity-kind :main
3249 :uri nil))
3250 (apply #'parse-xstream xstream handler args)))
3252 (defun string->xstream (string)
3253 (make-rod-xstream (string-rod string)))
3255 (defun parse-octets (octets handler &rest args)
3256 (apply #'parse-stream (make-octet-input-stream octets) handler args))
3258 ;;;;
3260 (defun zstream-push (new-xstream zstream)
3261 (cond ((find-if (lambda (x)
3262 (and (xstream-p x)
3263 (eql (stream-name-entity-name (xstream-name x))
3264 (stream-name-entity-name (xstream-name new-xstream)))
3265 (eql (stream-name-entity-kind (xstream-name x))
3266 (stream-name-entity-kind (xstream-name new-xstream)))))
3267 (zstream-input-stack zstream))
3268 (wf-error zstream "Infinite recursion.")))
3269 (push new-xstream (zstream-input-stack zstream))
3270 zstream)
3272 (defun recurse-on-entity (zstream name kind continuation &optional internalp)
3273 (assert (not (zstream-token-category zstream)))
3274 (call-with-entity-expansion-as-stream
3275 zstream
3276 (lambda (new-xstream)
3277 (push :stop (zstream-input-stack zstream))
3278 (zstream-push new-xstream zstream)
3279 (prog1
3280 (funcall continuation zstream)
3281 (assert (eq (peek-token zstream) :eof))
3282 (assert (eq (pop (zstream-input-stack zstream)) new-xstream))
3283 (close-xstream new-xstream)
3284 (assert (eq (pop (zstream-input-stack zstream)) :stop))
3285 (setf (zstream-token-category zstream) nil)
3286 '(consume-token zstream)) )
3287 name
3288 kind
3289 internalp))
3292 (defmacro read-data-until* ((predicate input res res-start res-end) &body body)
3293 ;; fast variant -- for now disabled for no apparent reason
3294 ;; -> res, res-start, res-end
3295 `(let* ((rptr (xstream-read-ptr ,input))
3296 (p0 rptr)
3297 (fptr (xstream-fill-ptr ,input))
3298 (buf (xstream-buffer ,input))
3299 ,res ,res-start ,res-end)
3300 (declare (type fixnum rptr fptr p0)
3301 (type (simple-array read-element (*)) buf))
3302 (loop
3303 (cond ((%= rptr fptr)
3304 ;; underflow -- hmm inject the scratch-pad with what we
3305 ;; read and continue, while using read-rune and collecting
3306 ;; d.h. besser waere hier auch while-reading zu benutzen.
3307 (setf (xstream-read-ptr ,input) rptr)
3308 (multiple-value-setq (,res ,res-start ,res-end)
3309 (with-rune-collector/raw (collect)
3310 (do ((i p0 (%+ i 1)))
3311 ((%= i rptr))
3312 (collect (%rune buf i)))
3313 (let (c)
3314 (loop
3315 (cond ((%= rptr fptr)
3316 (setf (xstream-read-ptr ,input) rptr)
3317 (setf c (peek-rune input))
3318 (cond ((eq c :eof)
3319 (return)))
3320 (setf rptr (xstream-read-ptr ,input)
3321 fptr (xstream-fill-ptr ,input)
3322 buf (xstream-buffer ,input)))
3324 (setf c (%rune buf rptr))))
3325 (cond ((,predicate c)
3326 ;; we stop
3327 (setf (xstream-read-ptr ,input) rptr)
3328 (return))
3330 ;; we continue
3331 (collect c)
3332 (setf rptr (%+ rptr 1))) )))))
3333 (return))
3334 ((,predicate (%rune buf rptr))
3335 ;; we stop
3336 (setf (xstream-read-ptr ,input) rptr)
3337 (setf ,res buf ,res-start p0 ,res-end rptr)
3338 (return) )
3340 we continue
3341 (sf rptr (%+ rptr 1))) ))
3342 ,@body ))
3345 (defmacro read-data-until* ((predicate input res res-start res-end) &body body)
3346 "Read data from `input' until `predicate' applied to the read char
3347 turns true. Then execute `body' with `res', `res-start', `res-end'
3348 bound to denote a subsequence (of RUNEs) containing the read portion.
3349 The rune upon which `predicate' turned true is neither consumed from
3350 the stream, nor included in `res'.
3352 Keep the predicate short, this it may be included more than once into
3353 the macro's expansion."
3355 (let ((input-var (gensym))
3356 (collect (gensym))
3357 (c (gensym)))
3358 `(LET ((,input-var ,input))
3359 (MULTIPLE-VALUE-BIND (,res ,res-start ,res-end)
3360 (WITH-RUNE-COLLECTOR/RAW (,collect)
3361 (LOOP
3362 (LET ((,c (PEEK-RUNE ,input-var)))
3363 (COND ((EQ ,c :EOF)
3364 ;; xxx error message
3365 (RETURN))
3366 ((FUNCALL ,predicate ,c)
3367 (RETURN))
3369 (,collect ,c)
3370 (CONSUME-RUNE ,input-var))))))
3371 (LOCALLY
3372 ,@body)))))
3374 (defun read-name-token (input)
3375 (read-data-until* ((lambda (rune)
3376 (declare (type rune rune))
3377 (not (name-rune-p rune)))
3378 input
3379 r rs re)
3380 (intern-name r rs re)))
3382 (defun read-cdata (input)
3383 (read-data-until* ((lambda (rune)
3384 (declare (type rune rune))
3385 (when (and (%rune< rune #/U+0020)
3386 (not (or (%rune= rune #/U+0009)
3387 (%rune= rune #/U+000a)
3388 (%rune= rune #/U+000d))))
3389 (wf-error input "code point invalid: ~A" rune))
3390 (or (%rune= rune #/<) (%rune= rune #/&)))
3391 input
3392 source start end)
3393 (locally
3394 (declare (type (simple-array rune (*)) source)
3395 (type ufixnum start)
3396 (type ufixnum end)
3397 (optimize (speed 3) (safety 0)))
3398 (let ((res (make-array (%- end start) :element-type 'rune)))
3399 (declare (type (simple-array rune (*)) res))
3400 (let ((i (%- end start)))
3401 (declare (type ufixnum i))
3402 (loop
3403 (setf i (- i 1))
3404 (setf (%rune res i) (%rune source (the ufixnum (+ i start))))
3405 (when (= i 0)
3406 (return))))
3407 res))))
3409 ;; used only by read-att-value-2
3410 (defun internal-entity-expansion (name)
3411 (let ((def (get-entity-definition name :general (dtd *ctx*))))
3412 (unless def
3413 (wf-error nil "Entity '~A' is not defined." (rod-string name)))
3414 (unless (typep def 'internal-entdef)
3415 (wf-error nil "Entity '~A' is not an internal entity." name))
3416 (or (entdef-expansion def)
3417 (setf (entdef-expansion def) (find-internal-entity-expansion name)))))
3419 ;; used only by read-att-value-2
3420 (defun find-internal-entity-expansion (name)
3421 (with-zstream (zinput)
3422 (with-rune-collector-3 (collect)
3423 (labels ((muffle (input)
3424 (let (c)
3425 (loop
3426 (setf c (read-rune input))
3427 (cond ((eq c :eof)
3428 (return))
3429 ((rune= c #/&)
3430 (setf c (peek-rune input))
3431 (cond ((eql c :eof)
3432 (eox input))
3433 ((rune= c #/#)
3434 (let ((c (read-character-reference input)))
3435 (%put-unicode-char c collect)))
3437 (unless (name-start-rune-p c)
3438 (wf-error zinput "Expecting name after &."))
3439 (let ((name (read-name-token input)))
3440 (setf c (read-rune input))
3441 (check-rune input c #/\;)
3442 (recurse-on-entity
3443 zinput name :general
3444 (lambda (zinput)
3445 (muffle (car (zstream-input-stack zinput)))))))))
3446 ((rune= c #/<)
3447 (wf-error zinput "unexpected #\/<"))
3448 ((space-rune-p c)
3449 (collect #/space))
3450 ((not (data-rune-p c))
3451 (wf-error zinput "illegal char: ~S." c))
3453 (collect c)))))))
3454 (declare (dynamic-extent #'muffle))
3455 (recurse-on-entity
3456 zinput name :general
3457 (lambda (zinput)
3458 (muffle (car (zstream-input-stack zinput)))))))))
3460 ;; callback for DOM
3461 (defun resolve-entity (name handler dtd)
3462 (let ((*validate* nil))
3463 (if (get-entity-definition name :general dtd)
3464 (let* ((*ctx* (make-context :handler handler :dtd dtd))
3465 (*data-behaviour* :DOC))
3466 (with-zstream (input)
3467 (with-scratch-pads ()
3468 (recurse-on-entity
3469 input name :general
3470 (lambda (input)
3471 (prog1
3472 (etypecase (checked-get-entdef name :general)
3473 (internal-entdef (p/content input))
3474 (external-entdef (p/ext-parsed-ent input)))
3475 (unless (eq (peek-token input) :eof)
3476 (wf-error input "Trailing garbage. - ~S"
3477 (peek-token input)))))))))
3478 nil)))
3480 (defun read-att-value-2 (input)
3481 (let ((delim (read-rune input)))
3482 (when (eql delim :eof)
3483 (eox input))
3484 (unless (member delim '(#/\" #/\') :test #'eql)
3485 (wf-error input
3486 "Bad attribute value delimiter ~S, must be either #\\\" or #\\\'."
3487 (rune-char delim)))
3488 (with-rune-collector-4 (collect)
3489 (loop
3490 (let ((c (read-rune input)))
3491 (cond ((eq c :eof)
3492 (eox input "EOF"))
3493 ((rune= c delim)
3494 (return))
3495 ((rune= c #/<)
3496 (wf-error input "'<' not allowed in attribute values"))
3497 ((rune= #/& c)
3498 (multiple-value-bind (kind sem) (read-entity-like input)
3499 (ecase kind
3500 (:CHARACTER-REFERENCE
3501 (%put-unicode-char sem collect))
3502 (:ENTITY-REFERENCE
3503 (let* ((exp (internal-entity-expansion sem))
3504 (n (length exp)))
3505 (declare (type (simple-array rune (*)) exp))
3506 (do ((i 0 (%+ i 1)))
3507 ((%= i n))
3508 (collect (%rune exp i))))))))
3509 ((space-rune-p c)
3510 (collect #/u+0020))
3512 (collect c))))))))
3514 ;;;;;;;;;;;;;;;;;
3516 ;;; Namespace stuff
3518 ;; We already know that name is part of a valid XML name, so all we
3519 ;; have to check is that the first rune is a name-start-rune and that
3520 ;; there is not colon in it.
3521 (defun nc-name-p (name)
3522 (and (plusp (length name))
3523 (name-start-rune-p (rune name 0))
3524 (notany #'(lambda (rune) (rune= #/: rune)) name)))
3526 (defun split-qname (qname)
3527 (declare (type runes:simple-rod qname))
3528 (let ((pos (position #/: qname)))
3529 (if pos
3530 (let ((prefix (subseq qname 0 pos))
3531 (local-name (subseq qname (1+ pos))))
3532 (when (zerop pos)
3533 (wf-error nil "empty namespace prefix"))
3534 (if (nc-name-p local-name)
3535 (values prefix local-name)
3536 (wf-error nil "~S is not a valid NcName."
3537 (rod-string local-name))))
3538 (values () qname))))
3540 (defun decode-qname (qname)
3541 "decode-qname name => namespace-uri, prefix, local-name"
3542 (declare (type runes:simple-rod qname))
3543 (multiple-value-bind (prefix local-name) (split-qname qname)
3544 (let ((uri (find-namespace-binding prefix)))
3545 (if uri
3546 (values uri prefix local-name)
3547 (values nil nil qname)))))
3550 (defun find-namespace-binding (prefix)
3551 (cdr (or (assoc (or prefix #"") *namespace-bindings* :test #'rod=)
3552 (wf-error nil "Undeclared namespace prefix: ~A" (rod-string prefix)))))
3554 ;; FIXME: Should probably be refactored by adding :start and :end to rod=/rod-equal
3555 (defun rod-starts-with (prefix rod)
3556 (and (<= (length prefix) (length rod))
3557 (dotimes (i (length prefix) t)
3558 (unless (rune= (rune prefix i) (rune rod i))
3559 (return nil)))))
3561 (defun xmlns-attr-p (attr-name)
3562 (rod-starts-with #.(string-rod "xmlns") attr-name))
3564 (defun attrname->prefix (attrname)
3565 (if (< 5 (length attrname))
3566 (subseq attrname 6)
3567 nil))
3569 (defun find-namespace-declarations (attributes)
3570 (loop
3571 for attribute in attributes
3572 for qname = (sax:attribute-qname attribute)
3573 when (xmlns-attr-p qname)
3574 collect (cons (attrname->prefix qname) (sax:attribute-value attribute))))
3576 (defun declare-namespaces (attributes)
3577 (let ((ns-decls (find-namespace-declarations attributes)))
3578 (dolist (ns-decl ns-decls)
3579 ;; check some namespace validity constraints
3580 (let ((prefix (car ns-decl))
3581 (uri (cdr ns-decl)))
3582 (cond
3583 ((and (rod= prefix #"xml")
3584 (not (rod= uri #"http://www.w3.org/XML/1998/namespace")))
3585 (wf-error nil
3586 "Attempt to rebind the prefix \"xml\" to ~S." (mu uri)))
3587 ((and (rod= uri #"http://www.w3.org/XML/1998/namespace")
3588 (not (rod= prefix #"xml")))
3589 (wf-error nil
3590 "The namespace ~
3591 URI \"http://www.w3.org/XML/1998/namespace\" may not ~
3592 be bound to the prefix ~S, only \"xml\" is legal."
3593 (mu prefix)))
3594 ((and (rod= prefix #"xmlns")
3595 (rod= uri #"http://www.w3.org/2000/xmlns/"))
3596 (wf-error nil
3597 "Attempt to bind the prefix \"xmlns\" to its predefined ~
3598 URI \"http://www.w3.org/2000/xmlns/\", which is ~
3599 forbidden for no good reason."))
3600 ((rod= prefix #"xmlns")
3601 (wf-error nil
3602 "Attempt to bind the prefix \"xmlns\" to the URI ~S, ~
3603 but it may not be declared." (mu uri)))
3604 ((rod= uri #"http://www.w3.org/2000/xmlns/")
3605 (wf-error nil
3606 "The namespace URI \"http://www.w3.org/2000/xmlns/\" may ~
3607 not be bound to prefix ~S (or any other)." (mu prefix)))
3608 ((and (rod= uri #"") prefix)
3609 (wf-error nil
3610 "Only the default namespace (the one without a prefix) ~
3611 may be bound to an empty namespace URI, thus ~
3612 undeclaring it."))
3614 (push (cons prefix (if (rod= #"" uri) nil uri))
3615 *namespace-bindings*)
3616 (sax:start-prefix-mapping (handler *ctx*)
3617 (car ns-decl)
3618 (cdr ns-decl))))))
3619 ns-decls))
3621 (defun undeclare-namespaces (ns-decls)
3622 (dolist (ns-decl ns-decls)
3623 (sax:end-prefix-mapping (handler *ctx*) (car ns-decl))))
3625 (defun build-attribute-list (attr-alist)
3626 ;; fixme: if there is a reason this function reverses attribute order,
3627 ;; it should be documented.
3628 (let (attributes)
3629 (dolist (pair attr-alist)
3630 (push (sax:make-attribute :qname (car pair)
3631 :value (cdr pair)
3632 :specified-p t)
3633 attributes))
3634 attributes))
3636 (defun check-attribute-uniqueness (attributes)
3637 ;; 5.3 Uniqueness of Attributes
3638 ;; In XML documents conforming to [the xmlns] specification, no
3639 ;; tag may contain two attributes which:
3640 ;; 1. have identical names, or
3641 ;; 2. have qualified names with the same local part and with
3642 ;; prefixes which have been bound to namespace names that are
3643 ;; identical.
3645 ;; 1. is checked by read-tag-2, so we only deal with 2 here
3646 (loop for (attr-1 . rest) on attributes do
3647 (when (and (sax:attribute-namespace-uri attr-1)
3648 (find-if (lambda (attr-2)
3649 (and (rod= (sax:attribute-namespace-uri attr-1)
3650 (sax:attribute-namespace-uri attr-2))
3651 (rod= (sax:attribute-local-name attr-1)
3652 (sax:attribute-local-name attr-2))))
3653 rest))
3654 (wf-error nil
3655 "Multiple definitions of attribute ~S in namespace ~S."
3656 (mu (sax:attribute-local-name attr-1))
3657 (mu (sax:attribute-namespace-uri attr-1))))))
3659 (defun set-attribute-namespace (attribute)
3660 (let ((qname (sax:attribute-qname attribute)))
3661 (if (and sax:*use-xmlns-namespace* (rod= qname #"xmlns"))
3662 (setf (sax:attribute-namespace-uri attribute)
3663 #"http://www.w3.org/2000/xmlns/")
3664 (multiple-value-bind (prefix local-name) (split-qname qname)
3665 (when (and prefix ;; default namespace doesn't apply to attributes
3666 (or (not (rod= #"xmlns" prefix))
3667 sax:*use-xmlns-namespace*))
3668 (setf (sax:attribute-namespace-uri attribute)
3669 (decode-qname qname)))
3670 (setf (sax:attribute-local-name attribute) local-name)))))
3672 ;;;;;;;;;;;;;;;;;
3674 ;; System Identifier Protocol
3676 ;; A system identifier is an object obeying to the system identifier
3677 ;; protocol. Often something like an URL or a pathname.
3679 ;; OPEN-SYS-ID sys-id [generic function]
3681 ;; Opens the resource associated with the system identifier `sys-id'
3682 ;; for reading and returns a stream. For now it is expected, that the
3683 ;; stream is an octet stream (one of element type (unsigned-byte 8)).
3685 ;; More precisely: The returned object only has to obey to the xstream
3686 ;; controller protocol. (That is it has to provide implementations for
3687 ;; READ-OCTETS and XSTREAM-CONTROLLER-CLOSE).
3689 ;; MERGE-SYS-ID sys-id base [generic function]
3691 ;; Merges two system identifiers. That is resolve `sys-id' relative to
3692 ;; `base' yielding an absolute system identifier suitable for
3693 ;; OPEN-SYS-ID.
3696 ;;;;;;;;;;;;;;;;;
3697 ;;; SAX validation handler
3699 (defclass validator ()
3700 ((context :initarg :context :accessor context)
3701 (cdatap :initform nil :accessor cdatap)))
3703 (defun make-validator (dtd root)
3704 (make-instance 'validator
3705 :context (make-context
3706 :handler nil
3707 :dtd dtd
3708 :model-stack (list (make-root-model root)))))
3710 (macrolet ((with-context ((validator) &body body)
3711 `(let ((*ctx* (context ,validator))
3712 (*validate* t))
3713 (with-scratch-pads () ;nicht schoen
3714 ,@body))))
3715 (defmethod sax:start-element ((handler validator) uri lname qname attributes)
3716 uri lname
3717 (with-context (handler)
3718 (validate-start-element *ctx* qname)
3719 (process-attributes *ctx* qname attributes)))
3721 (defmethod sax:start-cdata ((handler validator))
3722 (setf (cdatap handler) t))
3724 (defmethod sax:characters ((handler validator) data)
3725 (with-context (handler)
3726 (validate-characters *ctx* (if (cdatap handler) #"hack" data))))
3728 (defmethod sax:end-cdata ((handler validator))
3729 (setf (cdatap handler) nil))
3731 (defmethod sax:end-element ((handler validator) uri lname qname)
3732 uri lname
3733 (with-context (handler)
3734 (validate-end-element *ctx* qname))))