ref aufloesen
[cxml-rng.git] / parse.lisp
blob0b66c1099ad0a48f485821d78156b20627b60b9c
1 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 (in-package :cxml-rng)
29 #+sbcl
30 (declaim (optimize (debug 2)))
33 ;;;; Errors
35 (define-condition rng-error (simple-error) ())
37 (defun rng-error (source fmt &rest args)
38 (let ((s (make-string-output-stream)))
39 (apply #'format s fmt args)
40 (when source
41 (etypecase source
42 (klacks:source
43 (format s "~& [ Error at line ~D, column ~D in ~S ]"
44 (klacks:current-line-number source)
45 (klacks:current-column-number source)
46 (klacks:current-system-id source)))
47 (sax:sax-parser-mixin
48 (format s "~& [ Error at line ~D, column ~D in ~S ]"
49 (sax:line-number source)
50 (sax:column-number source)
51 (sax:system-id source))) ))
52 (error 'rng-error
53 :format-control "~A"
54 :format-arguments (list (get-output-stream-string s)))))
57 ;;;; Parser
59 (defvar *datatype-library*)
60 (defvar *namespace-uri*)
61 (defvar *entity-resolver*)
62 (defvar *external-href-stack*)
63 (defvar *include-uri-stack*)
64 (defvar *include-body-p* nil)
65 (defvar *grammar*)
67 (defvar *debug* nil)
69 (defstruct (parsed-grammar (:constructor make-parsed-grammar (pattern)))
70 (pattern (missing) :type pattern)
71 (interned-start nil :type (or null pattern))
72 (registratur nil :type (or null hash-table)))
74 (defun invoke-with-klacks-handler (fn source)
75 (if *debug*
76 (funcall fn)
77 (handler-case
78 (funcall fn)
79 (cxml:xml-parse-error (c)
80 (rng-error source "Cannot parse schema: ~A" c)))))
82 (defun parse-relax-ng (input &key entity-resolver)
83 (klacks:with-open-source (source (cxml:make-source input))
84 (invoke-with-klacks-handler
85 (lambda ()
86 (klacks:find-event source :start-element)
87 (let* ((*datatype-library* "")
88 (*namespace-uri* "")
89 (*entity-resolver* entity-resolver)
90 (*external-href-stack* '())
91 (*include-uri-stack* '())
92 (*grammar* (make-grammar nil))
93 (result (p/pattern source)))
94 (unless result
95 (rng-error nil "empty grammar"))
96 (setf (grammar-start *grammar*)
97 (make-definition :name :start :child result))
98 (check-pattern-definitions source *grammar*)
99 (check-recursion result 0)
100 (setf result (fold-not-allowed result))
101 (setf result (fold-empty result))
102 (make-parsed-grammar result)))
103 source)))
106 ;;;; pattern structures
108 (defstruct pattern)
110 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
111 child)
113 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
114 name)
115 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-")))
116 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-")))
118 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
119 a b)
120 (defstruct (group
121 (:include %combination)
122 (:constructor make-group (a b))))
123 (defstruct (interleave
124 (:include %combination)
125 (:constructor make-interleave (a b))))
126 (defstruct (choice
127 (:include %combination)
128 (:constructor make-choice (a b))))
129 (defstruct (after
130 (:include %combination)
131 (:constructor make-after (a b))))
133 (defstruct (one-or-more
134 (:include %parent)
135 (:constructor make-one-or-more (child))))
136 (defstruct (list-pattern
137 (:include %parent)
138 (:constructor make-list-pattern (child))))
140 (defstruct (ref
141 (:include pattern)
142 (:conc-name "PATTERN-")
143 (:constructor make-ref (target)))
144 crdepth
145 target)
147 (defstruct (%leaf (:include pattern)))
149 (defstruct (empty (:include %leaf) (:conc-name "PATTERN-")))
150 (defstruct (text (:include %leaf) (:conc-name "PATTERN-")))
152 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
153 datatype-library
154 type)
156 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
158 string)
160 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
161 params
162 except)
164 (defstruct (not-allowed (:include %leaf) (:conc-name "PATTERN-")))
167 ;;;; non-pattern
169 (defstruct (grammar (:constructor make-grammar (parent)))
170 (start nil)
171 parent
172 (definitions (make-hash-table :test 'equal)))
174 (defstruct param
175 name
176 string)
178 ;; Clark calls this structure "RefPattern"
179 (defstruct (definition (:conc-name "DEFN-"))
180 name
181 combine-method
182 head-p
183 redefinition
184 child)
187 ;;; name-class
189 (defun missing ()
190 (error "missing arg"))
192 (defstruct name-class)
194 (defstruct (any-name (:include name-class)
195 (:constructor make-any-name (except)))
196 (except (missing) :type (or null name-class)))
198 (defstruct (name (:include name-class)
199 (:constructor make-name (uri lname)))
200 (uri (missing) :type string)
201 (lname (missing) :type string))
203 (defstruct (ns-name (:include name-class)
204 (:constructor make-ns-name (uri except)))
205 (uri (missing) :type string)
206 (except (missing) :type (or null name-class)))
208 (defstruct (name-class-choice (:include name-class)
209 (:constructor make-name-class-choice (a b)))
210 (a (missing) :type name-class)
211 (b (missing) :type name-class))
213 (defun simplify-nc-choice (values)
214 (zip #'make-name-class-choice values))
217 ;;;; parser
219 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
221 (defun skip-foreign* (source)
222 (loop
223 (case (klacks:peek-next source)
224 (:start-element (skip-foreign source))
225 (:end-element (return)))))
227 (defun skip-to-native (source)
228 (loop
229 (case (klacks:peek source)
230 (:start-element
231 (when (equal (klacks:current-uri source) *rng-namespace*)
232 (return))
233 (klacks:serialize-element source nil))
234 (:end-element (return)))
235 (klacks:consume source)))
237 (defun consume-and-skip-to-native (source)
238 (klacks:consume source)
239 (skip-to-native source))
241 (defun skip-foreign (source)
242 (when (equal (klacks:current-uri source) *rng-namespace*)
243 (rng-error source
244 "invalid schema: ~A not allowed here"
245 (klacks:current-lname source)))
246 (klacks:serialize-element source nil))
248 (defun attribute (lname attrs)
249 (let ((a (sax:find-attribute-ns "" lname attrs)))
250 (if a
251 (sax:attribute-value a)
252 nil)))
254 (defparameter *whitespace*
255 (format nil "~C~C~C~C"
256 (code-char 9)
257 (code-char 32)
258 (code-char 13)
259 (code-char 10)))
261 (defun ntc (lname source-or-attrs)
262 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
263 (let* ((attrs
264 (if (listp source-or-attrs)
265 source-or-attrs
266 (klacks:list-attributes source-or-attrs)))
267 (a (sax:find-attribute-ns "" lname attrs)))
268 (if a
269 (string-trim *whitespace* (sax:attribute-value a))
270 nil)))
272 (defmacro with-library-and-ns (attrs &body body)
273 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
275 (defun invoke-with-library-and-ns (fn attrs)
276 (let* ((dl (attribute "datatypeLibrary" attrs))
277 (ns (attribute "ns" attrs))
278 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
279 (*namespace-uri* (or ns *namespace-uri*)))
280 (funcall fn)))
282 (defun p/pattern (source)
283 (let* ((lname (klacks:current-lname source))
284 (attrs (klacks:list-attributes source)))
285 (with-library-and-ns attrs
286 (case (find-symbol lname :keyword)
287 (:|element| (p/element source (ntc "name" attrs)))
288 (:|attribute| (p/attribute source (ntc "name" attrs)))
289 (:|group| (p/combination #'groupify source))
290 (:|interleave| (p/combination #'interleave-ify source))
291 (:|choice| (p/combination #'choice-ify source))
292 (:|optional| (p/optional source))
293 (:|zeroOrMore| (p/zero-or-more source))
294 (:|oneOrMore| (p/one-or-more source))
295 (:|list| (p/list source))
296 (:|mixed| (p/mixed source))
297 (:|ref| (p/ref source))
298 (:|parentRef| (p/parent-ref source))
299 (:|empty| (p/empty source))
300 (:|text| (p/text source))
301 (:|value| (p/value source))
302 (:|data| (p/data source))
303 (:|notAllowed| (p/not-allowed source))
304 (:|externalRef| (p/external-ref source))
305 (:|grammar| (p/grammar source))
306 (t (skip-foreign source))))))
308 (defun p/pattern+ (source)
309 (let ((children nil))
310 (loop
311 (case (klacks:peek source)
312 (:start-element
313 (let ((p (p/pattern source))) (when p (push p children))))
314 (:end-element
315 (return))
317 (klacks:consume source))))
318 (unless children
319 (rng-error source "empty element"))
320 (nreverse children)))
322 (defun p/pattern? (source)
323 (let ((result nil))
324 (loop
325 (skip-to-native source)
326 (case (klacks:peek source)
327 (:start-element
328 (when result
329 (rng-error source "at most one pattern expected here"))
330 (setf result (p/pattern source)))
331 (:end-element
332 (return))
334 (klacks:consume source))))
335 result))
337 (defun p/element (source name)
338 (klacks:expecting-element (source "element")
339 (let ((result (make-element)))
340 (consume-and-skip-to-native source)
341 (if name
342 (setf (pattern-name result) (destructure-name source name))
343 (setf (pattern-name result) (p/name-class source)))
344 (skip-to-native source)
345 (setf (pattern-child result) (groupify (p/pattern+ source)))
346 result)))
348 (defvar *attribute-namespace-p* nil)
350 (defun p/attribute (source name)
351 (klacks:expecting-element (source "attribute")
352 (let ((result (make-attribute)))
353 (consume-and-skip-to-native source)
354 (if name
355 (setf (pattern-name result)
356 (let ((*namespace-uri* ""))
357 (destructure-name source name)))
358 (setf (pattern-name result)
359 (let ((*attribute-namespace-p* t))
360 (p/name-class source))))
361 (skip-to-native source)
362 (setf (pattern-child result)
363 (or (p/pattern? source) (make-text)))
364 result)))
366 (defun p/combination (zipper source)
367 (klacks:expecting-element (source)
368 (consume-and-skip-to-native source)
369 (funcall zipper (p/pattern+ source))))
371 (defun p/one-or-more (source)
372 (klacks:expecting-element (source "oneOrMore")
373 (consume-and-skip-to-native source)
374 (let ((children (p/pattern+ source)))
375 (make-one-or-more (groupify children)))))
377 (defun p/zero-or-more (source)
378 (klacks:expecting-element (source "zeroOrMore")
379 (consume-and-skip-to-native source)
380 (let ((children (p/pattern+ source)))
381 (make-choice (make-one-or-more (groupify children))
382 (make-empty)))))
384 (defun p/optional (source)
385 (klacks:expecting-element (source "optional")
386 (consume-and-skip-to-native source)
387 (let ((children (p/pattern+ source)))
388 (make-choice (groupify children) (make-empty)))))
390 (defun p/list (source)
391 (klacks:expecting-element (source "list")
392 (consume-and-skip-to-native source)
393 (let ((children (p/pattern+ source)))
394 (make-list-pattern (groupify children)))))
396 (defun p/mixed (source)
397 (klacks:expecting-element (source "mixed")
398 (consume-and-skip-to-native source)
399 (let ((children (p/pattern+ source)))
400 (make-interleave (groupify children) (make-text)))))
402 (defun p/ref (source)
403 (klacks:expecting-element (source "ref")
404 (prog1
405 (let* ((name (ntc "name" source))
406 (pdefinition
407 (or (find-definition name)
408 (setf (find-definition name)
409 (make-definition :name name :child nil)))))
410 (make-ref pdefinition))
411 (skip-foreign* source))))
413 (defun p/parent-ref (source)
414 (klacks:expecting-element (source "parentRef")
415 (prog1
416 (let* ((name (ntc "name" source))
417 (grammar (grammar-parent *grammar*))
418 (pdefinition
419 (or (find-definition name grammar)
420 (setf (find-definition name grammar)
421 (make-definition :name name :child nil)))))
422 (make-ref pdefinition))
423 (skip-foreign* source))))
425 (defun p/empty (source)
426 (klacks:expecting-element (source "empty")
427 (skip-foreign* source)
428 (make-empty)))
430 (defun p/text (source)
431 (klacks:expecting-element (source "text")
432 (skip-foreign* source)
433 (make-text)))
435 (defun consume-and-parse-characters (source)
436 ;; fixme
437 (let ((tmp ""))
438 (loop
439 (multiple-value-bind (key data) (klacks:peek-next source)
440 (case key
441 (:characters
442 (setf tmp (concatenate 'string tmp data)))
443 (:end-element (return)))))
444 tmp))
446 (defun p/value (source)
447 (klacks:expecting-element (source "value")
448 (let* ((type (ntc "type" source))
449 (string (consume-and-parse-characters source))
450 (ns *namespace-uri*)
451 (dl *datatype-library*))
452 (unless type
453 (setf type "token")
454 (setf dl ""))
455 (make-value :string string :type type :ns ns :datatype-library dl))))
457 (defun p/data (source)
458 (klacks:expecting-element (source "data")
459 (let* ((type (ntc "type" source))
460 (result (make-data :type type
461 :datatype-library *datatype-library*
463 (params '()))
464 (loop
465 (multiple-value-bind (key uri lname)
466 (klacks:peek-next source)
468 (case key
469 (:start-element
470 (case (find-symbol lname :keyword)
471 (:|param| (push (p/param source) params))
472 (:|except|
473 (setf (pattern-except result) (p/except-pattern source))
474 (skip-to-native source)
475 (return))
476 (t (skip-foreign source))))
477 (:end-element
478 (return)))))
479 (setf (pattern-params result) (nreverse params))
480 result)))
482 (defun p/param (source)
483 (klacks:expecting-element (source "param")
484 (let ((name (ntc "name" source))
485 (string (consume-and-parse-characters source)))
486 (make-param :name name :string string))))
488 (defun p/except-pattern (source)
489 (klacks:expecting-element (source "except")
490 (with-library-and-ns (klacks:list-attributes source)
491 (klacks:consume source)
492 (choice-ify (p/pattern+ source)))))
494 (defun p/not-allowed (source)
495 (klacks:expecting-element (source "notAllowed")
496 (consume-and-skip-to-native source)
497 (make-not-allowed)))
499 (defun safe-parse-uri (source str &optional base)
500 (when (zerop (length str))
501 (rng-error source "missing URI"))
502 (handler-case
503 (if base
504 (puri:merge-uris str base)
505 (puri:parse-uri str))
506 (puri:uri-parse-error ()
507 (rng-error source "invalid URI: ~A" str))))
509 (defun p/external-ref (source)
510 (klacks:expecting-element (source "externalRef")
511 (let* ((href
512 (escape-uri (attribute "href" (klacks:list-attributes source))))
513 (base (klacks:current-xml-base source))
514 (uri (safe-parse-uri source href base)))
515 (when (find uri *include-uri-stack* :test #'puri:uri=)
516 (rng-error source "looping include"))
517 (prog1
518 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
519 (xstream
520 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
521 (klacks:with-open-source (source (cxml:make-source xstream))
522 (invoke-with-klacks-handler
523 (lambda ()
524 (klacks:find-event source :start-element)
525 (let ((*datatype-library* ""))
526 (p/pattern source)))
527 source)))
528 (skip-foreign* source)))))
530 (defun p/grammar (source &optional grammar)
531 (klacks:expecting-element (source "grammar")
532 (consume-and-skip-to-native source)
533 (let ((*grammar* (or grammar (make-grammar *grammar*))))
534 (process-grammar-content* source)
535 (unless (grammar-start *grammar*)
536 (rng-error source "no <start> in grammar"))
537 (check-pattern-definitions source *grammar*)
538 (defn-child (grammar-start *grammar*)))))
540 (defvar *include-start*)
541 (defvar *include-definitions*)
543 (defun process-grammar-content* (source &key disallow-include)
544 (loop
545 (multiple-value-bind (key uri lname) (klacks:peek source)
547 (case key
548 (:start-element
549 (with-library-and-ns (klacks:list-attributes source)
550 (case (find-symbol lname :keyword)
551 (:|start| (process-start source))
552 (:|define| (process-define source))
553 (:|div| (process-div source))
554 (:|include|
555 (when disallow-include
556 (rng-error source "nested include not permitted"))
557 (process-include source))
559 (skip-foreign source)))))
560 (:end-element
561 (return))))
562 (klacks:consume source)))
564 (defun process-start (source)
565 (klacks:expecting-element (source "start")
566 (let* ((combine0 (ntc "combine" source))
567 (combine
568 (when combine0
569 (find-symbol (string-upcase combine0) :keyword)))
570 (child
571 (progn
572 (consume-and-skip-to-native source)
573 (p/pattern source)))
574 (pdefinition (grammar-start *grammar*)))
575 (skip-foreign* source)
576 ;; fixme: shared code with process-define
577 (unless pdefinition
578 (setf pdefinition (make-definition :name :start :child nil))
579 (setf (grammar-start *grammar*) pdefinition))
580 (when *include-body-p*
581 (setf *include-start* pdefinition))
582 (cond
583 ((defn-child pdefinition)
584 (ecase (defn-redefinition pdefinition)
585 (:not-being-redefined
586 (when (and combine
587 (defn-combine-method pdefinition)
588 (not (eq combine
589 (defn-combine-method pdefinition))))
590 (rng-error source "conflicting combine values for <start>"))
591 (unless combine
592 (when (defn-head-p pdefinition)
593 (rng-error source "multiple definitions for <start>"))
594 (setf (defn-head-p pdefinition) t))
595 (unless (defn-combine-method pdefinition)
596 (setf (defn-combine-method pdefinition) combine))
597 (setf (defn-child pdefinition)
598 (case (defn-combine-method pdefinition)
599 (:choice
600 (make-choice (defn-child pdefinition) child))
601 (:interleave
602 (make-interleave (defn-child pdefinition) child)))))
603 (:being-redefined-and-no-original
604 (setf (defn-redefinition pdefinition)
605 :being-redefined-and-original))
606 (:being-redefined-and-original)))
608 (setf (defn-child pdefinition) child)
609 (setf (defn-combine-method pdefinition) combine)
610 (setf (defn-head-p pdefinition) (null combine))
611 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
613 (defun zip (constructor children)
614 (cond
615 ((null children)
616 (rng-error nil "empty choice?"))
617 ((null (cdr children))
618 (car children))
620 (destructuring-bind (a b &rest rest)
621 children
622 (zip constructor (cons (funcall constructor a b) rest))))))
624 (defun choice-ify (children) (zip #'make-choice children))
625 (defun groupify (children) (zip #'make-group children))
626 (defun interleave-ify (children) (zip #'make-interleave children))
628 (defun find-definition (name &optional (grammar *grammar*))
629 (gethash name (grammar-definitions grammar)))
631 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
632 (setf (gethash name (grammar-definitions grammar)) newval))
634 (defun process-define (source)
635 (klacks:expecting-element (source "define")
636 (let* ((name (ntc "name" source))
637 (combine0 (ntc "combine" source))
638 (combine (when combine0
639 (find-symbol (string-upcase combine0) :keyword)))
640 (child (groupify
641 (progn
642 (consume-and-skip-to-native source)
643 (p/pattern+ source))))
644 (pdefinition (find-definition name)))
645 (unless pdefinition
646 (setf pdefinition (make-definition :name name :child nil))
647 (setf (find-definition name) pdefinition))
648 (when *include-body-p*
649 (push pdefinition *include-definitions*))
650 (cond
651 ((defn-child pdefinition)
652 (case (defn-redefinition pdefinition)
653 (:not-being-redefined
654 (when (and combine
655 (defn-combine-method pdefinition)
656 (not (eq combine
657 (defn-combine-method pdefinition))))
658 (rng-error source "conflicting combine values for ~A" name))
659 (unless combine
660 (when (defn-head-p pdefinition)
661 (rng-error source "multiple definitions for ~A" name))
662 (setf (defn-head-p pdefinition) t))
663 (unless (defn-combine-method pdefinition)
664 (setf (defn-combine-method pdefinition) combine))
665 (setf (defn-child pdefinition)
666 (case (defn-combine-method pdefinition)
667 (:choice
668 (make-choice (defn-child pdefinition) child))
669 (:interleave
670 (make-interleave (defn-child pdefinition) child)))))
671 (:being-redefined-and-no-original
672 (setf (defn-redefinition pdefinition)
673 :being-redefined-and-original))
674 (:being-redefined-and-original)))
676 (setf (defn-child pdefinition) child)
677 (setf (defn-combine-method pdefinition) combine)
678 (setf (defn-head-p pdefinition) (null combine))
679 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
681 (defun process-div (source)
682 (klacks:expecting-element (source "div")
683 (consume-and-skip-to-native source)
684 (process-grammar-content* source)))
686 (defun reset-definition-for-include (defn)
687 (setf (defn-combine-method defn) nil)
688 (setf (defn-redefinition defn) :being-redefined-and-no-original)
689 (setf (defn-head-p defn) nil))
691 (defun restore-definition (defn original)
692 (setf (defn-combine-method defn) (defn-combine-method original))
693 (setf (defn-redefinition defn) (defn-redefinition original))
694 (setf (defn-head-p defn) (defn-head-p original)))
696 (defun process-include (source)
697 (klacks:expecting-element (source "include")
698 (let* ((href
699 (escape-uri (attribute "href" (klacks:list-attributes source))))
700 (base (klacks:current-xml-base source))
701 (uri (safe-parse-uri source href base))
702 (*include-start* nil)
703 (*include-definitions* '()))
704 (consume-and-skip-to-native source)
705 (let ((*include-body-p* t))
706 (process-grammar-content* source :disallow-include t))
707 (let ((tmp-start
708 (when *include-start*
709 (prog1
710 (copy-structure *include-start*)
711 (reset-definition-for-include *include-start*))))
712 (tmp-defns
713 (loop
714 for defn in *include-definitions*
715 collect
716 (prog1
717 (copy-structure defn)
718 (reset-definition-for-include defn)))))
719 (when (find uri *include-uri-stack* :test #'puri:uri=)
720 (rng-error source "looping include"))
721 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
722 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri)))
723 (klacks:with-open-source (source (cxml:make-source xstream))
724 (invoke-with-klacks-handler
725 (lambda ()
726 (klacks:find-event source :start-element)
727 (let ((*datatype-library* ""))
728 (p/grammar source *grammar*)))
729 source))
730 (check-pattern-definitions source *grammar*)
731 (when tmp-start
732 (restore-definition *include-start* tmp-start))
733 (dolist (copy tmp-defns)
734 (let ((defn (gethash (defn-name copy)
735 (grammar-definitions *grammar*))))
736 (restore-definition defn copy)))
737 (defn-child (grammar-start *grammar*)))))))
739 (defun check-pattern-definitions (source grammar)
740 (when (eq (defn-redefinition (grammar-start grammar))
741 :being-redefined-and-no-original)
742 (rng-error source "start not found in redefinition of grammar"))
743 (loop for defn being each hash-value in (grammar-definitions grammar) do
744 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
745 (rng-error source "redefinition not found in grammar"))
746 (unless (defn-child defn)
747 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
749 (defvar *any-name-allowed-p* t)
750 (defvar *ns-name-allowed-p* t)
752 (defun destructure-name (source qname)
753 (multiple-value-bind (uri lname)
754 (klacks:decode-qname qname source)
755 (setf uri (or uri *namespace-uri*))
756 (when (and *attribute-namespace-p*
757 (or (and (equal lname "xmlns") (equal uri ""))
758 (equal uri "http://www.w3.org/2000/xmlns")))
759 (rng-error source "namespace attribute not permitted"))
760 (make-name uri lname)))
762 (defun p/name-class (source)
763 (klacks:expecting-element (source)
764 (with-library-and-ns (klacks:list-attributes source)
765 (case (find-symbol (klacks:current-lname source) :keyword)
766 (:|name|
767 (let ((qname (string-trim *whitespace*
768 (consume-and-parse-characters source))))
769 (destructure-name source qname)))
770 (:|anyName|
771 (unless *any-name-allowed-p*
772 (rng-error source "anyname now permitted in except"))
773 (klacks:consume source)
774 (prog1
775 (let ((*any-name-allowed-p* nil))
776 (make-any-name (p/except-name-class? source)))
777 (skip-to-native source)))
778 (:|nsName|
779 (unless *ns-name-allowed-p*
780 (rng-error source "nsname now permitted in except"))
781 (let ((uri *namespace-uri*)
782 (*any-name-allowed-p* nil)
783 (*ns-name-allowed-p* nil))
784 (when (and *attribute-namespace-p*
785 (equal uri "http://www.w3.org/2000/xmlns"))
786 (rng-error source "namespace attribute not permitted"))
787 (klacks:consume source)
788 (prog1
789 (make-ns-name uri (p/except-name-class? source))
790 (skip-to-native source))))
791 (:|choice|
792 (klacks:consume source)
793 (simplify-nc-choice (p/name-class* source)))
795 (rng-error source "invalid child in except"))))))
797 (defun p/name-class* (source)
798 (let ((results nil))
799 (loop
800 (skip-to-native source)
801 (case (klacks:peek source)
802 (:start-element (push (p/name-class source) results))
803 (:end-element (return)))
804 (klacks:consume source))
805 (nreverse results)))
807 (defun p/except-name-class? (source)
808 (skip-to-native source)
809 (multiple-value-bind (key uri lname)
810 (klacks:peek source)
812 (if (and (eq key :start-element)
813 (string= (find-symbol lname :keyword) "except"))
814 (p/except-name-class source)
815 nil)))
817 (defun p/except-name-class (source)
818 (klacks:expecting-element (source "except")
819 (with-library-and-ns (klacks:list-attributes source)
820 (klacks:consume source)
821 (let ((x (p/name-class* source)))
822 (if (cdr x)
823 (simplify-nc-choice x)
824 (car x))))))
826 (defun escape-uri (string)
827 (with-output-to-string (out)
828 (loop for c across (cxml::rod-to-utf8-string string) do
829 (let ((code (char-code c)))
830 ;; http://www.w3.org/TR/xlink/#link-locators
831 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
832 (format out "%~2,'0X" code)
833 (write-char c out))))))
836 ;;;; unparsing
838 (defvar *definitions-to-names*)
839 (defvar *seen-names*)
841 (defun serialization-name (defn)
842 (or (gethash defn *definitions-to-names*)
843 (setf (gethash defn *definitions-to-names*)
844 (let ((name (if (gethash (defn-name defn) *seen-names*)
845 (format nil "~A-~D"
846 (defn-name defn)
847 (hash-table-count *seen-names*))
848 (defn-name defn))))
849 (setf (gethash name *seen-names*) defn)
850 name))))
852 (defun serialize-grammar (grammar sink)
853 (cxml:with-xml-output sink
854 (let ((*definitions-to-names* (make-hash-table))
855 (*seen-names* (make-hash-table :test 'equal)))
856 (cxml:with-element "grammar"
857 (cxml:with-element "start"
858 (serialize-pattern (parsed-grammar-pattern grammar)))
859 (loop for defn being each hash-key in *definitions-to-names* do
860 (serialize-definition defn))))))
862 (defun serialize-pattern (pattern)
863 (etypecase pattern
864 (element
865 (cxml:with-element "element"
866 (serialize-name (pattern-name pattern))
867 (serialize-pattern (pattern-child pattern))))
868 (attribute
869 (cxml:with-element "attribute"
870 (serialize-name (pattern-name pattern))
871 (serialize-pattern (pattern-child pattern))))
872 (%combination
873 (cxml:with-element
874 (etypecase pattern
875 (group "group")
876 (interleave "interleave")
877 (choice "choice"))
878 (serialize-pattern (pattern-a pattern))
879 (serialize-pattern (pattern-b pattern))))
880 (one-or-more
881 (cxml:with-element "oneOrmore"
882 (serialize-pattern (pattern-child pattern))))
883 (list-pattern
884 (cxml:with-element "list"
885 (serialize-pattern (pattern-child pattern))))
886 (ref
887 (cxml:with-element "ref"
888 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
889 (empty
890 (cxml:with-element "empty"))
891 (not-allowed
892 (cxml:with-element "notAllowed"))
893 (text
894 (cxml:with-element "text"))
895 (value
896 (cxml:with-element "value"
897 (cxml:attribute "datatype-library"
898 (pattern-datatype-library pattern))
899 (cxml:attribute "type" (pattern-type pattern))
900 (cxml:attribute "ns" (pattern-ns pattern))
901 (cxml:text (pattern-string pattern))))
902 (data
903 (cxml:with-element "value"
904 (cxml:attribute "datatype-library"
905 (pattern-datatype-library pattern))
906 (cxml:attribute "type" (pattern-type pattern))
907 (dolist (param (pattern-params pattern))
908 (cxml:with-element "param"
909 (cxml:attribute "name" (param-name param))
910 (cxml:text (param-string param))))
911 (when (pattern-except pattern)
912 (cxml:with-element "except"
913 (serialize-pattern (pattern-except pattern))))))))
915 (defun serialize-definition (defn)
916 (cxml:with-element "define"
917 (cxml:attribute "name" (serialization-name defn))
918 (serialize-pattern (defn-child defn))))
920 (defun serialize-name (name)
921 (etypecase name
922 (name
923 (cxml:with-element "name"
924 (cxml:attribute "ns" (name-uri name))
925 (cxml:text (name-lname name))))
926 (any-name
927 (cxml:with-element "anyName"
928 (when (any-name-except name)
929 (serialize-except-name (any-name-except name)))))
930 (ns-name
931 (cxml:with-element "anyName"
932 (cxml:attribute "ns" (ns-name-uri name))
933 (when (ns-name-except name)
934 (serialize-except-name (ns-name-except name)))))
935 (name-class-choice
936 (cxml:with-element "choice"
937 (serialize-name (name-class-choice-a name))
938 (serialize-name (name-class-choice-b name))))))
940 (defun serialize-except-name (spec)
941 (cxml:with-element "except"
942 (serialize-name (cdr spec))))
945 ;;;; simplification
947 ;;; 4.1 Annotations
948 ;;; Foreign attributes and elements are removed implicitly while parsing.
950 ;;; 4.2 Whitespace
951 ;;; All character data is discarded while parsing (which can only be
952 ;;; whitespace after validation).
954 ;;; Whitespace in name, type, and combine attributes is stripped while
955 ;;; parsing. Ditto for <name/>.
957 ;;; 4.3. datatypeLibrary attribute
958 ;;; Escaping is done by p/pattern.
959 ;;; Attribute value defaulting is done using *datatype-library*; only
960 ;;; p/data and p/value record the computed value.
962 ;;; 4.4. type attribute of value element
963 ;;; Done by p/value.
965 ;;; 4.5. href attribute
966 ;;; Escaping is done by process-include and p/external-ref.
968 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
969 ;;; but that requires xstream hacking.
971 ;;; 4.6. externalRef element
972 ;;; Done by p/external-ref.
974 ;;; 4.7. include element
975 ;;; Done by process-include.
977 ;;; 4.8. name attribute of element and attribute elements
978 ;;; `name' is stored as a slot, not a child. Done by p/element and
979 ;;; p/attribute.
981 ;;; 4.9. ns attribute
982 ;;; done by p/name-class, p/value, p/element, p/attribute
984 ;;; 4.10. QNames
985 ;;; done by p/name-class
987 ;;; 4.11. div element
988 ;;; Legen wir gar nicht erst an.
990 ;;; 4.12. 4.13 4.14 4.15
991 ;;; beim anlegen
993 ;;; 4.16
994 ;;; p/name-class
995 ;;; -- ausser der sache mit den datentypen
997 ;;; 4.17, 4.18, 4.19
998 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
999 ;;; beschrieben.
1001 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
1002 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
1003 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
1004 ;;; dafuer beim Serialisieren um.
1006 (defmethod check-recursion ((pattern element) depth)
1007 (check-recursion (pattern-child pattern) (1+ depth)))
1009 (defmethod check-recursion ((pattern ref) depth)
1010 (when (eql (pattern-crdepth pattern) depth)
1011 (rng-error nil "infinite recursion in ~A"
1012 (defn-name (pattern-target pattern))))
1013 (when (null (pattern-crdepth pattern))
1014 (setf (pattern-crdepth pattern) depth)
1015 (check-recursion (defn-child (pattern-target pattern)) depth)
1016 (setf (pattern-crdepth pattern) t)))
1018 (defmethod check-recursion ((pattern %parent) depth)
1019 (check-recursion (pattern-child pattern) depth))
1021 (defmethod check-recursion ((pattern %combination) depth)
1022 (check-recursion (pattern-a pattern) depth)
1023 (check-recursion (pattern-b pattern) depth))
1025 (defmethod check-recursion ((pattern %leaf) depth)
1026 (declare (ignore depth)))
1028 (defmethod check-recursion ((pattern data) depth)
1029 (when (pattern-except pattern)
1030 (check-recursion (pattern-except pattern) depth)))
1033 ;;;; 4.20
1035 ;;; %PARENT
1037 (defmethod fold-not-allowed ((pattern element))
1038 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1039 pattern)
1041 (defmethod fold-not-allowed ((pattern %parent))
1042 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1043 (if (typep (pattern-child pattern) 'not-allowed)
1044 (pattern-child pattern)
1045 pattern))
1047 ;;; %COMBINATION
1049 (defmethod fold-not-allowed ((pattern %combination))
1050 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1051 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1052 pattern)
1054 (defmethod fold-not-allowed ((pattern group))
1055 (call-next-method)
1056 (cond
1057 ;; remove if any child is not allowed
1058 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1059 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1060 (t pattern)))
1062 (defmethod fold-not-allowed ((pattern interleave))
1063 (call-next-method)
1064 (cond
1065 ;; remove if any child is not allowed
1066 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1067 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1068 (t pattern)))
1070 (defmethod fold-not-allowed ((pattern choice))
1071 (call-next-method)
1072 (cond
1073 ;; if any child is not allowed, choose the other
1074 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1075 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1076 (t pattern)))
1078 ;;; LEAF
1080 (defmethod fold-not-allowed ((pattern %leaf))
1081 pattern)
1083 (defmethod fold-not-allowed ((pattern data))
1084 (when (pattern-except pattern)
1085 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1086 (when (typep (pattern-except pattern) 'not-allowed)
1087 (setf (pattern-except pattern) nil)))
1088 pattern)
1090 ;;; REF
1092 (defmethod fold-not-allowed ((pattern ref))
1093 pattern)
1096 ;;;; 4.21
1098 ;;; %PARENT
1100 (defmethod fold-empty ((pattern one-or-more))
1101 (call-next-method)
1102 (if (typep (pattern-child pattern) 'empty)
1103 (pattern-child pattern)
1104 pattern))
1106 (defmethod fold-empty ((pattern %parent))
1107 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1108 pattern)
1110 ;;; %COMBINATION
1112 (defmethod fold-empty ((pattern %combination))
1113 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1114 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1115 pattern)
1117 (defmethod fold-empty ((pattern group))
1118 (call-next-method)
1119 (cond
1120 ;; if any child is empty, choose the other
1121 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1122 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1123 (t pattern)))
1125 (defmethod fold-empty ((pattern interleave))
1126 (call-next-method)
1127 (cond
1128 ;; if any child is empty, choose the other
1129 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1130 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1131 (t pattern)))
1133 (defmethod fold-empty ((pattern choice))
1134 (call-next-method)
1135 (if (typep (pattern-b pattern) 'empty)
1136 (cond
1137 ((typep (pattern-a pattern) 'empty)
1138 (pattern-a pattern))
1140 (rotatef (pattern-a pattern) (pattern-b pattern))
1141 pattern))
1142 pattern))
1144 ;;; LEAF
1146 (defmethod fold-empty ((pattern %leaf))
1147 pattern)
1149 (defmethod fold-empty ((pattern data))
1150 (when (pattern-except pattern)
1151 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1152 pattern)
1154 ;;; REF
1156 (defmethod fold-empty ((pattern ref))
1157 pattern)