tests fuer validierung
[cxml-rng.git] / parse.lisp
blobdfe50c3ac656d2bb3b4a185df07256da877737be
1 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 (in-package :cxml-rng)
29 #+sbcl
30 (declaim (optimize (debug 2)))
33 ;;;; Errors
35 (define-condition rng-error (simple-error) ())
37 (defun rng-error (source fmt &rest args)
38 (let ((s (make-string-output-stream)))
39 (apply #'format s fmt args)
40 (when source
41 (etypecase source
42 (klacks:source
43 (format s "~& [ Error at line ~D, column ~D in ~S ]"
44 (klacks:current-line-number source)
45 (klacks:current-column-number source)
46 (klacks:current-system-id source)))
47 (sax:sax-parser-mixin
48 (format s "~& [ Error at line ~D, column ~D in ~S ]"
49 (sax:line-number source)
50 (sax:column-number source)
51 (sax:system-id source))) ))
52 (error 'rng-error
53 :format-control "~A"
54 :format-arguments (list (get-output-stream-string s)))))
57 ;;;; Parser
59 (defvar *datatype-library*)
60 (defvar *namespace-uri*)
61 (defvar *entity-resolver*)
62 (defvar *external-href-stack*)
63 (defvar *include-uri-stack*)
64 (defvar *include-body-p* nil)
65 (defvar *grammar*)
67 (defvar *debug* nil)
69 (defun invoke-with-klacks-handler (fn source)
70 (if *debug*
71 (funcall fn)
72 (handler-case
73 (funcall fn)
74 (cxml:xml-parse-error (c)
75 (rng-error source "Cannot parse schema: ~A" c)))))
77 (defun parse-relax-ng (input &key entity-resolver)
78 (klacks:with-open-source (source (cxml:make-source input))
79 (invoke-with-klacks-handler
80 (lambda ()
81 (klacks:find-event source :start-element)
82 (let* ((*datatype-library* "")
83 (*namespace-uri* "")
84 (*entity-resolver* entity-resolver)
85 (*external-href-stack* '())
86 (*include-uri-stack* '())
87 (*grammar* (make-grammar nil))
88 (result (p/pattern source)))
89 (unless result
90 (rng-error nil "empty grammar"))
91 (setf (grammar-start *grammar*)
92 (make-definition :name :start :child result))
93 (check-pattern-definitions source *grammar*)
94 (check-recursion result 0)
95 (setf result (fold-not-allowed result))
96 (setf result (fold-empty result))
97 result))
98 source)))
101 ;;;; pattern structures
103 (defstruct pattern)
105 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
106 child)
108 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
109 name)
110 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-")))
111 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-")))
113 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
114 a b)
115 (defstruct (group
116 (:include %combination)
117 (:constructor make-group (a b))))
118 (defstruct (interleave
119 (:include %combination)
120 (:constructor make-interleave (a b))))
121 (defstruct (choice
122 (:include %combination)
123 (:constructor make-choice (a b))))
124 (defstruct (after
125 (:include %combination)
126 (:constructor make-after (a b))))
128 (defstruct (one-or-more
129 (:include %parent)
130 (:constructor make-one-or-more (child))))
131 (defstruct (list-pattern
132 (:include %parent)
133 (:constructor make-list-pattern (child))))
135 (defstruct (ref
136 (:include pattern)
137 (:conc-name "PATTERN-")
138 (:constructor make-ref (target)))
139 crdepth
140 target)
142 (defstruct (%leaf (:include pattern)))
144 (defstruct (empty (:include %leaf) (:conc-name "PATTERN-")))
145 (defstruct (text (:include %leaf) (:conc-name "PATTERN-")))
147 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
148 datatype-library
149 type)
151 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
153 string)
155 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
156 params
157 except)
159 (defstruct (not-allowed (:include %leaf) (:conc-name "PATTERN-")))
162 ;;;; non-pattern
164 (defstruct (grammar (:constructor make-grammar (parent)))
165 (start nil)
166 parent
167 (definitions (make-hash-table :test 'equal)))
169 (defstruct param
170 name
171 string)
173 ;; Clark calls this structure "RefPattern"
174 (defstruct (definition (:conc-name "DEFN-"))
175 name
176 combine-method
177 head-p
178 redefinition
179 child)
182 ;;; name-class
184 (defun missing ()
185 (error "missing arg"))
187 (defstruct name-class)
189 (defstruct (any-name (:include name-class)
190 (:constructor make-any-name (except)))
191 (except (missing) :type (or null name-class)))
193 (defstruct (name (:include name-class)
194 (:constructor make-name (uri lname)))
195 (uri (missing) :type string)
196 (lname (missing) :type string))
198 (defstruct (ns-name (:include name-class)
199 (:constructor make-ns-name (uri except)))
200 (uri (missing) :type string)
201 (except (missing) :type (or null name-class)))
203 (defstruct (name-class-choice (:include name-class)
204 (:constructor make-name-class-choice (a b)))
205 (a (missing) :type name-class)
206 (b (missing) :type name-class))
208 (defun simplify-nc-choice (values)
209 (zip #'make-name-class-choice values))
212 ;;;; parser
214 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
216 (defun skip-foreign* (source)
217 (loop
218 (case (klacks:peek-next source)
219 (:start-element (skip-foreign source))
220 (:end-element (return)))))
222 (defun skip-to-native (source)
223 (loop
224 (case (klacks:peek source)
225 (:start-element
226 (when (equal (klacks:current-uri source) *rng-namespace*)
227 (return))
228 (klacks:serialize-element source nil))
229 (:end-element (return)))
230 (klacks:consume source)))
232 (defun consume-and-skip-to-native (source)
233 (klacks:consume source)
234 (skip-to-native source))
236 (defun skip-foreign (source)
237 (when (equal (klacks:current-uri source) *rng-namespace*)
238 (rng-error source
239 "invalid schema: ~A not allowed here"
240 (klacks:current-lname source)))
241 (klacks:serialize-element source nil))
243 (defun attribute (lname attrs)
244 (let ((a (sax:find-attribute-ns "" lname attrs)))
245 (if a
246 (sax:attribute-value a)
247 nil)))
249 (defparameter *whitespace*
250 (format nil "~C~C~C~C"
251 (code-char 9)
252 (code-char 32)
253 (code-char 13)
254 (code-char 10)))
256 (defun ntc (lname source-or-attrs)
257 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
258 (let* ((attrs
259 (if (listp source-or-attrs)
260 source-or-attrs
261 (klacks:list-attributes source-or-attrs)))
262 (a (sax:find-attribute-ns "" lname attrs)))
263 (if a
264 (string-trim *whitespace* (sax:attribute-value a))
265 nil)))
267 (defmacro with-library-and-ns (attrs &body body)
268 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
270 (defun invoke-with-library-and-ns (fn attrs)
271 (let* ((dl (attribute "datatypeLibrary" attrs))
272 (ns (attribute "ns" attrs))
273 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
274 (*namespace-uri* (or ns *namespace-uri*)))
275 (funcall fn)))
277 (defun p/pattern (source)
278 (let* ((lname (klacks:current-lname source))
279 (attrs (klacks:list-attributes source)))
280 (with-library-and-ns attrs
281 (case (find-symbol lname :keyword)
282 (:|element| (p/element source (ntc "name" attrs)))
283 (:|attribute| (p/attribute source (ntc "name" attrs)))
284 (:|group| (p/combination #'groupify source))
285 (:|interleave| (p/combination #'interleave-ify source))
286 (:|choice| (p/combination #'choice-ify source))
287 (:|optional| (p/optional source))
288 (:|zeroOrMore| (p/zero-or-more source))
289 (:|oneOrMore| (p/one-or-more source))
290 (:|list| (p/list source))
291 (:|mixed| (p/mixed source))
292 (:|ref| (p/ref source))
293 (:|parentRef| (p/parent-ref source))
294 (:|empty| (p/empty source))
295 (:|text| (p/text source))
296 (:|value| (p/value source))
297 (:|data| (p/data source))
298 (:|notAllowed| (p/not-allowed source))
299 (:|externalRef| (p/external-ref source))
300 (:|grammar| (p/grammar source))
301 (t (skip-foreign source))))))
303 (defun p/pattern+ (source)
304 (let ((children nil))
305 (loop
306 (case (klacks:peek source)
307 (:start-element
308 (let ((p (p/pattern source))) (when p (push p children))))
309 (:end-element
310 (return))
312 (klacks:consume source))))
313 (unless children
314 (rng-error source "empty element"))
315 (nreverse children)))
317 (defun p/pattern? (source)
318 (let ((result nil))
319 (loop
320 (skip-to-native source)
321 (case (klacks:peek source)
322 (:start-element
323 (when result
324 (rng-error source "at most one pattern expected here"))
325 (setf result (p/pattern source)))
326 (:end-element
327 (return))
329 (klacks:consume source))))
330 result))
332 (defun p/element (source name)
333 (klacks:expecting-element (source "element")
334 (let ((result (make-element)))
335 (consume-and-skip-to-native source)
336 (if name
337 (setf (pattern-name result) (destructure-name source name))
338 (setf (pattern-name result) (p/name-class source)))
339 (skip-to-native source)
340 (setf (pattern-child result) (groupify (p/pattern+ source)))
341 result)))
343 (defvar *attribute-namespace-p* nil)
345 (defun p/attribute (source name)
346 (klacks:expecting-element (source "attribute")
347 (let ((result (make-attribute)))
348 (consume-and-skip-to-native source)
349 (if name
350 (setf (pattern-name result)
351 (let ((*namespace-uri* ""))
352 (destructure-name source name)))
353 (setf (pattern-name result)
354 (let ((*attribute-namespace-p* t))
355 (p/name-class source))))
356 (skip-to-native source)
357 (setf (pattern-child result)
358 (or (p/pattern? source) (make-text)))
359 result)))
361 (defun p/combination (zipper source)
362 (klacks:expecting-element (source)
363 (consume-and-skip-to-native source)
364 (funcall zipper (p/pattern+ source))))
366 (defun p/one-or-more (source)
367 (klacks:expecting-element (source "oneOrMore")
368 (consume-and-skip-to-native source)
369 (let ((children (p/pattern+ source)))
370 (make-one-or-more (groupify children)))))
372 (defun p/zero-or-more (source)
373 (klacks:expecting-element (source "zeroOrMore")
374 (consume-and-skip-to-native source)
375 (let ((children (p/pattern+ source)))
376 (make-choice (make-one-or-more (groupify children))
377 (make-empty)))))
379 (defun p/optional (source)
380 (klacks:expecting-element (source "optional")
381 (consume-and-skip-to-native source)
382 (let ((children (p/pattern+ source)))
383 (make-choice (groupify children) (make-empty)))))
385 (defun p/list (source)
386 (klacks:expecting-element (source "list")
387 (consume-and-skip-to-native source)
388 (let ((children (p/pattern+ source)))
389 (make-list-pattern (groupify children)))))
391 (defun p/mixed (source)
392 (klacks:expecting-element (source "mixed")
393 (consume-and-skip-to-native source)
394 (let ((children (p/pattern+ source)))
395 (make-interleave (groupify children) (make-text)))))
397 (defun p/ref (source)
398 (klacks:expecting-element (source "ref")
399 (prog1
400 (let* ((name (ntc "name" source))
401 (pdefinition
402 (or (find-definition name)
403 (setf (find-definition name)
404 (make-definition :name name :child nil)))))
405 (make-ref pdefinition))
406 (skip-foreign* source))))
408 (defun p/parent-ref (source)
409 (klacks:expecting-element (source "parentRef")
410 (prog1
411 (let* ((name (ntc "name" source))
412 (grammar (grammar-parent *grammar*))
413 (pdefinition
414 (or (find-definition name grammar)
415 (setf (find-definition name grammar)
416 (make-definition :name name :child nil)))))
417 (make-ref pdefinition))
418 (skip-foreign* source))))
420 (defun p/empty (source)
421 (klacks:expecting-element (source "empty")
422 (skip-foreign* source)
423 (make-empty)))
425 (defun p/text (source)
426 (klacks:expecting-element (source "text")
427 (skip-foreign* source)
428 (make-text)))
430 (defun consume-and-parse-characters (source)
431 ;; fixme
432 (let ((tmp ""))
433 (loop
434 (multiple-value-bind (key data) (klacks:peek-next source)
435 (case key
436 (:characters
437 (setf tmp (concatenate 'string tmp data)))
438 (:end-element (return)))))
439 tmp))
441 (defun p/value (source)
442 (klacks:expecting-element (source "value")
443 (let* ((type (ntc "type" source))
444 (string (consume-and-parse-characters source))
445 (ns *namespace-uri*)
446 (dl *datatype-library*))
447 (unless type
448 (setf type "token")
449 (setf dl ""))
450 (make-value :string string :type type :ns ns :datatype-library dl))))
452 (defun p/data (source)
453 (klacks:expecting-element (source "data")
454 (let* ((type (ntc "type" source))
455 (result (make-data :type type
456 :datatype-library *datatype-library*
458 (params '()))
459 (loop
460 (multiple-value-bind (key uri lname)
461 (klacks:peek-next source)
463 (case key
464 (:start-element
465 (case (find-symbol lname :keyword)
466 (:|param| (push (p/param source) params))
467 (:|except|
468 (setf (pattern-except result) (p/except-pattern source))
469 (skip-to-native source)
470 (return))
471 (t (skip-foreign source))))
472 (:end-element
473 (return)))))
474 (setf (pattern-params result) (nreverse params))
475 result)))
477 (defun p/param (source)
478 (klacks:expecting-element (source "param")
479 (let ((name (ntc "name" source))
480 (string (consume-and-parse-characters source)))
481 (make-param :name name :string string))))
483 (defun p/except-pattern (source)
484 (klacks:expecting-element (source "except")
485 (with-library-and-ns (klacks:list-attributes source)
486 (klacks:consume source)
487 (choice-ify (p/pattern+ source)))))
489 (defun p/not-allowed (source)
490 (klacks:expecting-element (source "notAllowed")
491 (consume-and-skip-to-native source)
492 (make-not-allowed)))
494 (defun safe-parse-uri (source str &optional base)
495 (when (zerop (length str))
496 (rng-error source "missing URI"))
497 (handler-case
498 (if base
499 (puri:merge-uris str base)
500 (puri:parse-uri str))
501 (puri:uri-parse-error ()
502 (rng-error source "invalid URI: ~A" str))))
504 (defun p/external-ref (source)
505 (klacks:expecting-element (source "externalRef")
506 (let* ((href
507 (escape-uri (attribute "href" (klacks:list-attributes source))))
508 (base (klacks:current-xml-base source))
509 (uri (safe-parse-uri source href base)))
510 (when (find uri *include-uri-stack* :test #'puri:uri=)
511 (rng-error source "looping include"))
512 (prog1
513 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
514 (xstream
515 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
516 (klacks:with-open-source (source (cxml:make-source xstream))
517 (invoke-with-klacks-handler
518 (lambda ()
519 (klacks:find-event source :start-element)
520 (let ((*datatype-library* ""))
521 (p/pattern source)))
522 source)))
523 (skip-foreign* source)))))
525 (defun p/grammar (source &optional grammar)
526 (klacks:expecting-element (source "grammar")
527 (consume-and-skip-to-native source)
528 (let ((*grammar* (or grammar (make-grammar *grammar*))))
529 (process-grammar-content* source)
530 (unless (grammar-start *grammar*)
531 (rng-error source "no <start> in grammar"))
532 (check-pattern-definitions source *grammar*)
533 (defn-child (grammar-start *grammar*)))))
535 (defvar *include-start*)
536 (defvar *include-definitions*)
538 (defun process-grammar-content* (source &key disallow-include)
539 (loop
540 (multiple-value-bind (key uri lname) (klacks:peek source)
542 (case key
543 (:start-element
544 (with-library-and-ns (klacks:list-attributes source)
545 (case (find-symbol lname :keyword)
546 (:|start| (process-start source))
547 (:|define| (process-define source))
548 (:|div| (process-div source))
549 (:|include|
550 (when disallow-include
551 (rng-error source "nested include not permitted"))
552 (process-include source))
554 (skip-foreign source)))))
555 (:end-element
556 (return))))
557 (klacks:consume source)))
559 (defun process-start (source)
560 (klacks:expecting-element (source "start")
561 (let* ((combine0 (ntc "combine" source))
562 (combine
563 (when combine0
564 (find-symbol (string-upcase combine0) :keyword)))
565 (child
566 (progn
567 (consume-and-skip-to-native source)
568 (p/pattern source)))
569 (pdefinition (grammar-start *grammar*)))
570 (skip-foreign* source)
571 ;; fixme: shared code with process-define
572 (unless pdefinition
573 (setf pdefinition (make-definition :name :start :child nil))
574 (setf (grammar-start *grammar*) pdefinition))
575 (when *include-body-p*
576 (setf *include-start* pdefinition))
577 (cond
578 ((defn-child pdefinition)
579 (ecase (defn-redefinition pdefinition)
580 (:not-being-redefined
581 (when (and combine
582 (defn-combine-method pdefinition)
583 (not (eq combine
584 (defn-combine-method pdefinition))))
585 (rng-error source "conflicting combine values for <start>"))
586 (unless combine
587 (when (defn-head-p pdefinition)
588 (rng-error source "multiple definitions for <start>"))
589 (setf (defn-head-p pdefinition) t))
590 (unless (defn-combine-method pdefinition)
591 (setf (defn-combine-method pdefinition) combine))
592 (setf (defn-child pdefinition)
593 (case (defn-combine-method pdefinition)
594 (:choice
595 (make-choice (defn-child pdefinition) child))
596 (:interleave
597 (make-interleave (defn-child pdefinition) child)))))
598 (:being-redefined-and-no-original
599 (setf (defn-redefinition pdefinition)
600 :being-redefined-and-original))
601 (:being-redefined-and-original)))
603 (setf (defn-child pdefinition) child)
604 (setf (defn-combine-method pdefinition) combine)
605 (setf (defn-head-p pdefinition) (null combine))
606 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
608 (defun zip (constructor children)
609 (cond
610 ((null children)
611 (rng-error nil "empty choice?"))
612 ((null (cdr children))
613 (car children))
615 (destructuring-bind (a b &rest rest)
616 children
617 (zip constructor (cons (funcall constructor a b) rest))))))
619 (defun choice-ify (children) (zip #'make-choice children))
620 (defun groupify (children) (zip #'make-group children))
621 (defun interleave-ify (children) (zip #'make-interleave children))
623 (defun find-definition (name &optional (grammar *grammar*))
624 (gethash name (grammar-definitions grammar)))
626 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
627 (setf (gethash name (grammar-definitions grammar)) newval))
629 (defun process-define (source)
630 (klacks:expecting-element (source "define")
631 (let* ((name (ntc "name" source))
632 (combine0 (ntc "combine" source))
633 (combine (when combine0
634 (find-symbol (string-upcase combine0) :keyword)))
635 (child (groupify
636 (progn
637 (consume-and-skip-to-native source)
638 (p/pattern+ source))))
639 (pdefinition (find-definition name)))
640 (unless pdefinition
641 (setf pdefinition (make-definition :name name :child nil))
642 (setf (find-definition name) pdefinition))
643 (when *include-body-p*
644 (push pdefinition *include-definitions*))
645 (cond
646 ((defn-child pdefinition)
647 (case (defn-redefinition pdefinition)
648 (:not-being-redefined
649 (when (and combine
650 (defn-combine-method pdefinition)
651 (not (eq combine
652 (defn-combine-method pdefinition))))
653 (rng-error source "conflicting combine values for ~A" name))
654 (unless combine
655 (when (defn-head-p pdefinition)
656 (rng-error source "multiple definitions for ~A" name))
657 (setf (defn-head-p pdefinition) t))
658 (unless (defn-combine-method pdefinition)
659 (setf (defn-combine-method pdefinition) combine))
660 (setf (defn-child pdefinition)
661 (case (defn-combine-method pdefinition)
662 (:choice
663 (make-choice (defn-child pdefinition) child))
664 (:interleave
665 (make-interleave (defn-child pdefinition) child)))))
666 (:being-redefined-and-no-original
667 (setf (defn-redefinition pdefinition)
668 :being-redefined-and-original))
669 (:being-redefined-and-original)))
671 (setf (defn-child pdefinition) child)
672 (setf (defn-combine-method pdefinition) combine)
673 (setf (defn-head-p pdefinition) (null combine))
674 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
676 (defun process-div (source)
677 (klacks:expecting-element (source "div")
678 (consume-and-skip-to-native source)
679 (process-grammar-content* source)))
681 (defun reset-definition-for-include (defn)
682 (setf (defn-combine-method defn) nil)
683 (setf (defn-redefinition defn) :being-redefined-and-no-original)
684 (setf (defn-head-p defn) nil))
686 (defun restore-definition (defn original)
687 (setf (defn-combine-method defn) (defn-combine-method original))
688 (setf (defn-redefinition defn) (defn-redefinition original))
689 (setf (defn-head-p defn) (defn-head-p original)))
691 (defun process-include (source)
692 (klacks:expecting-element (source "include")
693 (let* ((href
694 (escape-uri (attribute "href" (klacks:list-attributes source))))
695 (base (klacks:current-xml-base source))
696 (uri (safe-parse-uri source href base))
697 (*include-start* nil)
698 (*include-definitions* '()))
699 (consume-and-skip-to-native source)
700 (let ((*include-body-p* t))
701 (process-grammar-content* source :disallow-include t))
702 (let ((tmp-start
703 (when *include-start*
704 (prog1
705 (copy-structure *include-start*)
706 (reset-definition-for-include *include-start*))))
707 (tmp-defns
708 (loop
709 for defn in *include-definitions*
710 collect
711 (prog1
712 (copy-structure defn)
713 (reset-definition-for-include defn)))))
714 (when (find uri *include-uri-stack* :test #'puri:uri=)
715 (rng-error source "looping include"))
716 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
717 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri)))
718 (klacks:with-open-source (source (cxml:make-source xstream))
719 (invoke-with-klacks-handler
720 (lambda ()
721 (klacks:find-event source :start-element)
722 (let ((*datatype-library* ""))
723 (p/grammar source *grammar*)))
724 source))
725 (check-pattern-definitions source *grammar*)
726 (when tmp-start
727 (restore-definition *include-start* tmp-start))
728 (dolist (copy tmp-defns)
729 (let ((defn (gethash (defn-name copy)
730 (grammar-definitions *grammar*))))
731 (restore-definition defn copy)))
732 (defn-child (grammar-start *grammar*)))))))
734 (defun check-pattern-definitions (source grammar)
735 (when (eq (defn-redefinition (grammar-start grammar))
736 :being-redefined-and-no-original)
737 (rng-error source "start not found in redefinition of grammar"))
738 (loop for defn being each hash-value in (grammar-definitions grammar) do
739 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
740 (rng-error source "redefinition not found in grammar"))
741 (unless (defn-child defn)
742 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
744 (defvar *any-name-allowed-p* t)
745 (defvar *ns-name-allowed-p* t)
747 (defun destructure-name (source qname)
748 (multiple-value-bind (uri lname)
749 (klacks:decode-qname qname source)
750 (setf uri (or uri *namespace-uri*))
751 (when (and *attribute-namespace-p*
752 (or (and (equal lname "xmlns") (equal uri ""))
753 (equal uri "http://www.w3.org/2000/xmlns")))
754 (rng-error source "namespace attribute not permitted"))
755 (make-name uri lname)))
757 (defun p/name-class (source)
758 (klacks:expecting-element (source)
759 (with-library-and-ns (klacks:list-attributes source)
760 (case (find-symbol (klacks:current-lname source) :keyword)
761 (:|name|
762 (let ((qname (string-trim *whitespace*
763 (consume-and-parse-characters source))))
764 (destructure-name source qname)))
765 (:|anyName|
766 (unless *any-name-allowed-p*
767 (rng-error source "anyname now permitted in except"))
768 (klacks:consume source)
769 (prog1
770 (let ((*any-name-allowed-p* nil))
771 (make-any-name (p/except-name-class? source)))
772 (skip-to-native source)))
773 (:|nsName|
774 (unless *ns-name-allowed-p*
775 (rng-error source "nsname now permitted in except"))
776 (let ((uri *namespace-uri*)
777 (*any-name-allowed-p* nil)
778 (*ns-name-allowed-p* nil))
779 (when (and *attribute-namespace-p*
780 (equal uri "http://www.w3.org/2000/xmlns"))
781 (rng-error source "namespace attribute not permitted"))
782 (klacks:consume source)
783 (prog1
784 (make-ns-name uri (p/except-name-class? source))
785 (skip-to-native source))))
786 (:|choice|
787 (klacks:consume source)
788 (simplify-nc-choice (p/name-class* source)))
790 (rng-error source "invalid child in except"))))))
792 (defun p/name-class* (source)
793 (let ((results nil))
794 (loop
795 (skip-to-native source)
796 (case (klacks:peek source)
797 (:start-element (push (p/name-class source) results))
798 (:end-element (return)))
799 (klacks:consume source))
800 (nreverse results)))
802 (defun p/except-name-class? (source)
803 (skip-to-native source)
804 (multiple-value-bind (key uri lname)
805 (klacks:peek source)
807 (if (and (eq key :start-element)
808 (string= (find-symbol lname :keyword) "except"))
809 (p/except-name-class source)
810 nil)))
812 (defun p/except-name-class (source)
813 (klacks:expecting-element (source "except")
814 (with-library-and-ns (klacks:list-attributes source)
815 (klacks:consume source)
816 (let ((x (p/name-class* source)))
817 (if (cdr x)
818 (simplify-nc-choice x)
819 (car x))))))
821 (defun escape-uri (string)
822 (with-output-to-string (out)
823 (loop for c across (cxml::rod-to-utf8-string string) do
824 (let ((code (char-code c)))
825 ;; http://www.w3.org/TR/xlink/#link-locators
826 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
827 (format out "%~2,'0X" code)
828 (write-char c out))))))
831 ;;;; unparsing
833 (defvar *definitions-to-names*)
834 (defvar *seen-names*)
836 (defun serialization-name (defn)
837 (or (gethash defn *definitions-to-names*)
838 (setf (gethash defn *definitions-to-names*)
839 (let ((name (if (gethash (defn-name defn) *seen-names*)
840 (format nil "~A-~D"
841 (defn-name defn)
842 (hash-table-count *seen-names*))
843 (defn-name defn))))
844 (setf (gethash name *seen-names*) defn)
845 name))))
847 (defun serialize-grammar (grammar sink)
848 (cxml:with-xml-output sink
849 (let ((*definitions-to-names* (make-hash-table))
850 (*seen-names* (make-hash-table :test 'equal)))
851 (cxml:with-element "grammar"
852 (cxml:with-element "start"
853 (serialize-pattern grammar))
854 (loop for defn being each hash-key in *definitions-to-names* do
855 (serialize-definition defn))))))
857 (defun serialize-pattern (pattern)
858 (etypecase pattern
859 (element
860 (cxml:with-element "element"
861 (serialize-name (pattern-name pattern))
862 (serialize-pattern (pattern-child pattern))))
863 (attribute
864 (cxml:with-element "attribute"
865 (serialize-name (pattern-name pattern))
866 (serialize-pattern (pattern-child pattern))))
867 (%combination
868 (cxml:with-element
869 (etypecase pattern
870 (group "group")
871 (interleave "interleave")
872 (choice "choice"))
873 (serialize-pattern (pattern-a pattern))
874 (serialize-pattern (pattern-b pattern))))
875 (one-or-more
876 (cxml:with-element "oneOrmore"
877 (serialize-pattern (pattern-child pattern))))
878 (list-pattern
879 (cxml:with-element "list"
880 (serialize-pattern (pattern-child pattern))))
881 (ref
882 (cxml:with-element "ref"
883 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
884 (empty
885 (cxml:with-element "empty"))
886 (not-allowed
887 (cxml:with-element "notAllowed"))
888 (text
889 (cxml:with-element "text"))
890 (value
891 (cxml:with-element "value"
892 (cxml:attribute "datatype-library"
893 (pattern-datatype-library pattern))
894 (cxml:attribute "type" (pattern-type pattern))
895 (cxml:attribute "ns" (pattern-ns pattern))
896 (cxml:text (pattern-string pattern))))
897 (data
898 (cxml:with-element "value"
899 (cxml:attribute "datatype-library"
900 (pattern-datatype-library pattern))
901 (cxml:attribute "type" (pattern-type pattern))
902 (dolist (param (pattern-params pattern))
903 (cxml:with-element "param"
904 (cxml:attribute "name" (param-name param))
905 (cxml:text (param-string param))))
906 (when (pattern-except pattern)
907 (cxml:with-element "except"
908 (serialize-pattern (pattern-except pattern))))))))
910 (defun serialize-definition (defn)
911 (cxml:with-element "define"
912 (cxml:attribute "name" (serialization-name defn))
913 (serialize-pattern (defn-child defn))))
915 (defun serialize-name (name)
916 (etypecase name
917 (name
918 (cxml:with-element "name"
919 (cxml:attribute "ns" (name-uri name))
920 (cxml:text (name-lname name))))
921 (any-name
922 (cxml:with-element "anyName"
923 (when (any-name-except name)
924 (serialize-except-name (any-name-except name)))))
925 (ns-name
926 (cxml:with-element "anyName"
927 (cxml:attribute "ns" (ns-name-uri name))
928 (when (ns-name-except name)
929 (serialize-except-name (ns-name-except name)))))
930 (name-class-choice
931 (cxml:with-element "choice"
932 (serialize-name (name-class-choice-a name))
933 (serialize-name (name-class-choice-b name))))))
935 (defun serialize-except-name (spec)
936 (cxml:with-element "except"
937 (serialize-name (cdr spec))))
940 ;;;; simplification
942 ;;; 4.1 Annotations
943 ;;; Foreign attributes and elements are removed implicitly while parsing.
945 ;;; 4.2 Whitespace
946 ;;; All character data is discarded while parsing (which can only be
947 ;;; whitespace after validation).
949 ;;; Whitespace in name, type, and combine attributes is stripped while
950 ;;; parsing. Ditto for <name/>.
952 ;;; 4.3. datatypeLibrary attribute
953 ;;; Escaping is done by p/pattern.
954 ;;; Attribute value defaulting is done using *datatype-library*; only
955 ;;; p/data and p/value record the computed value.
957 ;;; 4.4. type attribute of value element
958 ;;; Done by p/value.
960 ;;; 4.5. href attribute
961 ;;; Escaping is done by process-include and p/external-ref.
963 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
964 ;;; but that requires xstream hacking.
966 ;;; 4.6. externalRef element
967 ;;; Done by p/external-ref.
969 ;;; 4.7. include element
970 ;;; Done by process-include.
972 ;;; 4.8. name attribute of element and attribute elements
973 ;;; `name' is stored as a slot, not a child. Done by p/element and
974 ;;; p/attribute.
976 ;;; 4.9. ns attribute
977 ;;; done by p/name-class, p/value, p/element, p/attribute
979 ;;; 4.10. QNames
980 ;;; done by p/name-class
982 ;;; 4.11. div element
983 ;;; Legen wir gar nicht erst an.
985 ;;; 4.12. 4.13 4.14 4.15
986 ;;; beim anlegen
988 ;;; 4.16
989 ;;; p/name-class
990 ;;; -- ausser der sache mit den datentypen
992 ;;; 4.17, 4.18, 4.19
993 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
994 ;;; beschrieben.
996 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
997 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
998 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
999 ;;; dafuer beim Serialisieren um.
1001 (defmethod check-recursion ((pattern element) depth)
1002 (check-recursion (pattern-child pattern) (1+ depth)))
1004 (defmethod check-recursion ((pattern ref) depth)
1005 (when (eql (pattern-crdepth pattern) depth)
1006 (rng-error nil "infinite recursion in ~A"
1007 (defn-name (pattern-target pattern))))
1008 (when (null (pattern-crdepth pattern))
1009 (setf (pattern-crdepth pattern) depth)
1010 (check-recursion (defn-child (pattern-target pattern)) depth)
1011 (setf (pattern-crdepth pattern) t)))
1013 (defmethod check-recursion ((pattern %parent) depth)
1014 (check-recursion (pattern-child pattern) depth))
1016 (defmethod check-recursion ((pattern %combination) depth)
1017 (check-recursion (pattern-a pattern) depth)
1018 (check-recursion (pattern-b pattern) depth))
1020 (defmethod check-recursion ((pattern %leaf) depth)
1021 (declare (ignore depth)))
1023 (defmethod check-recursion ((pattern data) depth)
1024 (when (pattern-except pattern)
1025 (check-recursion (pattern-except pattern) depth)))
1028 ;;;; 4.20
1030 ;;; %PARENT
1032 (defmethod fold-not-allowed ((pattern element))
1033 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1034 pattern)
1036 (defmethod fold-not-allowed ((pattern %parent))
1037 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1038 (if (typep (pattern-child pattern) 'not-allowed)
1039 (pattern-child pattern)
1040 pattern))
1042 ;;; %COMBINATION
1044 (defmethod fold-not-allowed ((pattern %combination))
1045 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1046 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1047 pattern)
1049 (defmethod fold-not-allowed ((pattern group))
1050 (call-next-method)
1051 (cond
1052 ;; remove if any child is not allowed
1053 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1054 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1055 (t pattern)))
1057 (defmethod fold-not-allowed ((pattern interleave))
1058 (call-next-method)
1059 (cond
1060 ;; remove if any child is not allowed
1061 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1062 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1063 (t pattern)))
1065 (defmethod fold-not-allowed ((pattern choice))
1066 (call-next-method)
1067 (cond
1068 ;; if any child is not allowed, choose the other
1069 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1070 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1071 (t pattern)))
1073 ;;; LEAF
1075 (defmethod fold-not-allowed ((pattern %leaf))
1076 pattern)
1078 (defmethod fold-not-allowed ((pattern data))
1079 (when (pattern-except pattern)
1080 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1081 (when (typep (pattern-except pattern) 'not-allowed)
1082 (setf (pattern-except pattern) nil)))
1083 pattern)
1085 ;;; REF
1087 (defmethod fold-not-allowed ((pattern ref))
1088 pattern)
1091 ;;;; 4.21
1093 ;;; %PARENT
1095 (defmethod fold-empty ((pattern one-or-more))
1096 (call-next-method)
1097 (if (typep (pattern-child pattern) 'empty)
1098 (pattern-child pattern)
1099 pattern))
1101 (defmethod fold-empty ((pattern %parent))
1102 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1103 pattern)
1105 ;;; %COMBINATION
1107 (defmethod fold-empty ((pattern %combination))
1108 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1109 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1110 pattern)
1112 (defmethod fold-empty ((pattern group))
1113 (call-next-method)
1114 (cond
1115 ;; if any child is empty, choose the other
1116 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1117 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1118 (t pattern)))
1120 (defmethod fold-empty ((pattern interleave))
1121 (call-next-method)
1122 (cond
1123 ;; if any child is empty, choose the other
1124 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1125 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1126 (t pattern)))
1128 (defmethod fold-empty ((pattern choice))
1129 (call-next-method)
1130 (if (typep (pattern-b pattern) 'empty)
1131 (cond
1132 ((typep (pattern-a pattern) 'empty)
1133 (pattern-a pattern))
1135 (rotatef (pattern-a pattern) (pattern-b pattern))
1136 pattern))
1137 pattern))
1139 ;;; LEAF
1141 (defmethod fold-empty ((pattern %leaf))
1142 pattern)
1144 (defmethod fold-empty ((pattern data))
1145 (when (pattern-except pattern)
1146 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1147 pattern)
1149 ;;; REF
1151 (defmethod fold-empty ((pattern ref))
1152 pattern)