ns-defaulting in attribute
[cxml-rng.git] / parse.lisp
blob1c48e4ee5228c1f81d33508bb7928fc02d7405f1
1 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 (in-package :cxml-rng)
29 #+sbcl
30 (declaim (optimize (debug 2)))
33 ;;;; Errors
35 (define-condition rng-error (simple-error) ())
37 (defun rng-error (source fmt &rest args)
38 (let ((s (make-string-output-stream)))
39 (apply #'format s fmt args)
40 (when source
41 (etypecase source
42 (klacks:source
43 (format s "~& [ Error at line ~D, column ~D in ~S ]"
44 (klacks:current-line-number source)
45 (klacks:current-column-number source)
46 (klacks:current-system-id source)))
47 (sax:sax-parser-mixin
48 (format s "~& [ Error at line ~D, column ~D in ~S ]"
49 (sax:line-number source)
50 (sax:column-number source)
51 (sax:system-id source))) ))
52 (error 'rng-error
53 :format-control "~A"
54 :format-arguments (list (get-output-stream-string s)))))
57 ;;;; Parser
59 (defvar *datatype-library*)
60 (defvar *namespace-uri*)
61 (defvar *ns*)
62 (defvar *entity-resolver*)
63 (defvar *external-href-stack*)
64 (defvar *include-uri-stack*)
65 (defvar *include-body-p* nil)
66 (defvar *grammar*)
68 (defvar *debug* nil)
70 (defstruct (parsed-grammar (:constructor make-parsed-grammar (pattern)))
71 (pattern (missing) :type pattern)
72 (interned-start nil :type (or null pattern))
73 (registratur nil :type (or null hash-table)))
75 (defun invoke-with-klacks-handler (fn source)
76 (if *debug*
77 (funcall fn)
78 (handler-case
79 (funcall fn)
80 (cxml:xml-parse-error (c)
81 (rng-error source "Cannot parse schema: ~A" c)))))
83 (defun parse-relax-ng (input &key entity-resolver)
84 (klacks:with-open-source (source (cxml:make-source input))
85 (invoke-with-klacks-handler
86 (lambda ()
87 (klacks:find-event source :start-element)
88 (let* ((*datatype-library* "")
89 (*namespace-uri* "")
90 (*entity-resolver* entity-resolver)
91 (*external-href-stack* '())
92 (*include-uri-stack* '())
93 (*grammar* (make-grammar nil))
94 (result (p/pattern source)))
95 (unless result
96 (rng-error nil "empty grammar"))
97 (setf (grammar-start *grammar*)
98 (make-definition :name :start :child result))
99 (check-pattern-definitions source *grammar*)
100 (check-recursion result 0)
101 (setf result (fold-not-allowed result))
102 (setf result (fold-empty result))
103 (make-parsed-grammar result)))
104 source)))
107 ;;;; pattern structures
109 (defstruct pattern)
111 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
112 child)
114 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
115 name)
116 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-")))
117 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-")))
119 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
120 a b)
121 (defstruct (group
122 (:include %combination)
123 (:constructor make-group (a b))))
124 (defstruct (interleave
125 (:include %combination)
126 (:constructor make-interleave (a b))))
127 (defstruct (choice
128 (:include %combination)
129 (:constructor make-choice (a b))))
130 (defstruct (after
131 (:include %combination)
132 (:constructor make-after (a b))))
134 (defstruct (one-or-more
135 (:include %parent)
136 (:constructor make-one-or-more (child))))
137 (defstruct (list-pattern
138 (:include %parent)
139 (:constructor make-list-pattern (child))))
141 (defstruct (ref
142 (:include pattern)
143 (:conc-name "PATTERN-")
144 (:constructor make-ref (target)))
145 crdepth
146 target)
148 (defstruct (%leaf (:include pattern)))
150 (defstruct (empty (:include %leaf) (:conc-name "PATTERN-")))
151 (defstruct (text (:include %leaf) (:conc-name "PATTERN-")))
153 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
154 datatype-library
155 type)
157 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
159 string)
161 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
162 params
163 except)
165 (defstruct (not-allowed (:include %leaf) (:conc-name "PATTERN-")))
168 ;;;; non-pattern
170 (defstruct (grammar (:constructor make-grammar (parent)))
171 (start nil)
172 parent
173 (definitions (make-hash-table :test 'equal)))
175 (defstruct param
176 name
177 string)
179 ;; Clark calls this structure "RefPattern"
180 (defstruct (definition (:conc-name "DEFN-"))
181 name
182 combine-method
183 head-p
184 redefinition
185 child)
188 ;;; name-class
190 (defun missing ()
191 (error "missing arg"))
193 (defstruct name-class)
195 (defstruct (any-name (:include name-class)
196 (:constructor make-any-name (except)))
197 (except (missing) :type (or null name-class)))
199 (defstruct (name (:include name-class)
200 (:constructor make-name (uri lname)))
201 (uri (missing) :type string)
202 (lname (missing) :type string))
204 (defstruct (ns-name (:include name-class)
205 (:constructor make-ns-name (uri except)))
206 (uri (missing) :type string)
207 (except (missing) :type (or null name-class)))
209 (defstruct (name-class-choice (:include name-class)
210 (:constructor make-name-class-choice (a b)))
211 (a (missing) :type name-class)
212 (b (missing) :type name-class))
214 (defun simplify-nc-choice (values)
215 (zip #'make-name-class-choice values))
218 ;;;; parser
220 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
222 (defun skip-foreign* (source)
223 (loop
224 (case (klacks:peek-next source)
225 (:start-element (skip-foreign source))
226 (:end-element (return)))))
228 (defun skip-to-native (source)
229 (loop
230 (case (klacks:peek source)
231 (:start-element
232 (when (equal (klacks:current-uri source) *rng-namespace*)
233 (return))
234 (klacks:serialize-element source nil))
235 (:end-element (return)))
236 (klacks:consume source)))
238 (defun consume-and-skip-to-native (source)
239 (klacks:consume source)
240 (skip-to-native source))
242 (defun skip-foreign (source)
243 (when (equal (klacks:current-uri source) *rng-namespace*)
244 (rng-error source
245 "invalid schema: ~A not allowed here"
246 (klacks:current-lname source)))
247 (klacks:serialize-element source nil))
249 (defun attribute (lname attrs)
250 (let ((a (sax:find-attribute-ns "" lname attrs)))
251 (if a
252 (sax:attribute-value a)
253 nil)))
255 (defparameter *whitespace*
256 (format nil "~C~C~C~C"
257 (code-char 9)
258 (code-char 32)
259 (code-char 13)
260 (code-char 10)))
262 (defun ntc (lname source-or-attrs)
263 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
264 (let* ((attrs
265 (if (listp source-or-attrs)
266 source-or-attrs
267 (klacks:list-attributes source-or-attrs)))
268 (a (sax:find-attribute-ns "" lname attrs)))
269 (if a
270 (string-trim *whitespace* (sax:attribute-value a))
271 nil)))
273 (defmacro with-library-and-ns (attrs &body body)
274 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
276 (defun invoke-with-library-and-ns (fn attrs)
277 (let* ((dl (attribute "datatypeLibrary" attrs))
278 (ns (attribute "ns" attrs))
279 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
280 (*namespace-uri* (or ns *namespace-uri*))
281 (*ns* ns))
282 (funcall fn)))
284 (defun p/pattern (source)
285 (let* ((lname (klacks:current-lname source))
286 (attrs (klacks:list-attributes source)))
287 (with-library-and-ns attrs
288 (case (find-symbol lname :keyword)
289 (:|element| (p/element source (ntc "name" attrs)))
290 (:|attribute| (p/attribute source (ntc "name" attrs)))
291 (:|group| (p/combination #'groupify source))
292 (:|interleave| (p/combination #'interleave-ify source))
293 (:|choice| (p/combination #'choice-ify source))
294 (:|optional| (p/optional source))
295 (:|zeroOrMore| (p/zero-or-more source))
296 (:|oneOrMore| (p/one-or-more source))
297 (:|list| (p/list source))
298 (:|mixed| (p/mixed source))
299 (:|ref| (p/ref source))
300 (:|parentRef| (p/parent-ref source))
301 (:|empty| (p/empty source))
302 (:|text| (p/text source))
303 (:|value| (p/value source))
304 (:|data| (p/data source))
305 (:|notAllowed| (p/not-allowed source))
306 (:|externalRef| (p/external-ref source))
307 (:|grammar| (p/grammar source))
308 (t (skip-foreign source))))))
310 (defun p/pattern+ (source)
311 (let ((children nil))
312 (loop
313 (case (klacks:peek source)
314 (:start-element
315 (let ((p (p/pattern source))) (when p (push p children))))
316 (:end-element
317 (return))
319 (klacks:consume source))))
320 (unless children
321 (rng-error source "empty element"))
322 (nreverse children)))
324 (defun p/pattern? (source)
325 (let ((result nil))
326 (loop
327 (skip-to-native source)
328 (case (klacks:peek source)
329 (:start-element
330 (when result
331 (rng-error source "at most one pattern expected here"))
332 (setf result (p/pattern source)))
333 (:end-element
334 (return))
336 (klacks:consume source))))
337 result))
339 (defun p/element (source name)
340 (klacks:expecting-element (source "element")
341 (let ((result (make-element)))
342 (consume-and-skip-to-native source)
343 (if name
344 (setf (pattern-name result) (destructure-name source name))
345 (setf (pattern-name result) (p/name-class source)))
346 (skip-to-native source)
347 (setf (pattern-child result) (groupify (p/pattern+ source)))
348 result)))
350 (defvar *attribute-namespace-p* nil)
352 (defun p/attribute (source name)
353 (klacks:expecting-element (source "attribute")
354 (let ((result (make-attribute)))
355 (consume-and-skip-to-native source)
356 (if name
357 (setf (pattern-name result)
358 (let ((*namespace-uri* (or *ns* "")))
359 (destructure-name source name)))
360 (setf (pattern-name result)
361 (let ((*attribute-namespace-p* t))
362 (p/name-class source))))
363 (skip-to-native source)
364 (setf (pattern-child result)
365 (or (p/pattern? source) (make-text)))
366 result)))
368 (defun p/combination (zipper source)
369 (klacks:expecting-element (source)
370 (consume-and-skip-to-native source)
371 (funcall zipper (p/pattern+ source))))
373 (defun p/one-or-more (source)
374 (klacks:expecting-element (source "oneOrMore")
375 (consume-and-skip-to-native source)
376 (let ((children (p/pattern+ source)))
377 (make-one-or-more (groupify children)))))
379 (defun p/zero-or-more (source)
380 (klacks:expecting-element (source "zeroOrMore")
381 (consume-and-skip-to-native source)
382 (let ((children (p/pattern+ source)))
383 (make-choice (make-one-or-more (groupify children))
384 (make-empty)))))
386 (defun p/optional (source)
387 (klacks:expecting-element (source "optional")
388 (consume-and-skip-to-native source)
389 (let ((children (p/pattern+ source)))
390 (make-choice (groupify children) (make-empty)))))
392 (defun p/list (source)
393 (klacks:expecting-element (source "list")
394 (consume-and-skip-to-native source)
395 (let ((children (p/pattern+ source)))
396 (make-list-pattern (groupify children)))))
398 (defun p/mixed (source)
399 (klacks:expecting-element (source "mixed")
400 (consume-and-skip-to-native source)
401 (let ((children (p/pattern+ source)))
402 (make-interleave (groupify children) (make-text)))))
404 (defun p/ref (source)
405 (klacks:expecting-element (source "ref")
406 (prog1
407 (let* ((name (ntc "name" source))
408 (pdefinition
409 (or (find-definition name)
410 (setf (find-definition name)
411 (make-definition :name name :child nil)))))
412 (make-ref pdefinition))
413 (skip-foreign* source))))
415 (defun p/parent-ref (source)
416 (klacks:expecting-element (source "parentRef")
417 (prog1
418 (let* ((name (ntc "name" source))
419 (grammar (grammar-parent *grammar*))
420 (pdefinition
421 (or (find-definition name grammar)
422 (setf (find-definition name grammar)
423 (make-definition :name name :child nil)))))
424 (make-ref pdefinition))
425 (skip-foreign* source))))
427 (defun p/empty (source)
428 (klacks:expecting-element (source "empty")
429 (skip-foreign* source)
430 (make-empty)))
432 (defun p/text (source)
433 (klacks:expecting-element (source "text")
434 (skip-foreign* source)
435 (make-text)))
437 (defun consume-and-parse-characters (source)
438 ;; fixme
439 (let ((tmp ""))
440 (loop
441 (multiple-value-bind (key data) (klacks:peek-next source)
442 (case key
443 (:characters
444 (setf tmp (concatenate 'string tmp data)))
445 (:end-element (return)))))
446 tmp))
448 (defun p/value (source)
449 (klacks:expecting-element (source "value")
450 (let* ((type (ntc "type" source))
451 (string (consume-and-parse-characters source))
452 (ns *namespace-uri*)
453 (dl *datatype-library*))
454 (unless type
455 (setf type "token")
456 (setf dl ""))
457 (make-value :string string :type type :ns ns :datatype-library dl))))
459 (defun p/data (source)
460 (klacks:expecting-element (source "data")
461 (let* ((type (ntc "type" source))
462 (result (make-data :type type
463 :datatype-library *datatype-library*
465 (params '()))
466 (loop
467 (multiple-value-bind (key uri lname)
468 (klacks:peek-next source)
470 (case key
471 (:start-element
472 (case (find-symbol lname :keyword)
473 (:|param| (push (p/param source) params))
474 (:|except|
475 (setf (pattern-except result) (p/except-pattern source))
476 (skip-to-native source)
477 (return))
478 (t (skip-foreign source))))
479 (:end-element
480 (return)))))
481 (setf (pattern-params result) (nreverse params))
482 result)))
484 (defun p/param (source)
485 (klacks:expecting-element (source "param")
486 (let ((name (ntc "name" source))
487 (string (consume-and-parse-characters source)))
488 (make-param :name name :string string))))
490 (defun p/except-pattern (source)
491 (klacks:expecting-element (source "except")
492 (with-library-and-ns (klacks:list-attributes source)
493 (klacks:consume source)
494 (choice-ify (p/pattern+ source)))))
496 (defun p/not-allowed (source)
497 (klacks:expecting-element (source "notAllowed")
498 (consume-and-skip-to-native source)
499 (make-not-allowed)))
501 (defun safe-parse-uri (source str &optional base)
502 (when (zerop (length str))
503 (rng-error source "missing URI"))
504 (handler-case
505 (if base
506 (puri:merge-uris str base)
507 (puri:parse-uri str))
508 (puri:uri-parse-error ()
509 (rng-error source "invalid URI: ~A" str))))
511 (defun p/external-ref (source)
512 (klacks:expecting-element (source "externalRef")
513 (let* ((href
514 (escape-uri (attribute "href" (klacks:list-attributes source))))
515 (base (klacks:current-xml-base source))
516 (uri (safe-parse-uri source href base)))
517 (when (find uri *include-uri-stack* :test #'puri:uri=)
518 (rng-error source "looping include"))
519 (prog1
520 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
521 (xstream
522 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
523 (klacks:with-open-source (source (cxml:make-source xstream))
524 (invoke-with-klacks-handler
525 (lambda ()
526 (klacks:find-event source :start-element)
527 (let ((*datatype-library* ""))
528 (p/pattern source)))
529 source)))
530 (skip-foreign* source)))))
532 (defun p/grammar (source &optional grammar)
533 (klacks:expecting-element (source "grammar")
534 (consume-and-skip-to-native source)
535 (let ((*grammar* (or grammar (make-grammar *grammar*))))
536 (process-grammar-content* source)
537 (unless (grammar-start *grammar*)
538 (rng-error source "no <start> in grammar"))
539 (check-pattern-definitions source *grammar*)
540 (defn-child (grammar-start *grammar*)))))
542 (defvar *include-start*)
543 (defvar *include-definitions*)
545 (defun process-grammar-content* (source &key disallow-include)
546 (loop
547 (multiple-value-bind (key uri lname) (klacks:peek source)
549 (case key
550 (:start-element
551 (with-library-and-ns (klacks:list-attributes source)
552 (case (find-symbol lname :keyword)
553 (:|start| (process-start source))
554 (:|define| (process-define source))
555 (:|div| (process-div source))
556 (:|include|
557 (when disallow-include
558 (rng-error source "nested include not permitted"))
559 (process-include source))
561 (skip-foreign source)))))
562 (:end-element
563 (return))))
564 (klacks:consume source)))
566 (defun process-start (source)
567 (klacks:expecting-element (source "start")
568 (let* ((combine0 (ntc "combine" source))
569 (combine
570 (when combine0
571 (find-symbol (string-upcase combine0) :keyword)))
572 (child
573 (progn
574 (consume-and-skip-to-native source)
575 (p/pattern source)))
576 (pdefinition (grammar-start *grammar*)))
577 (skip-foreign* source)
578 ;; fixme: shared code with process-define
579 (unless pdefinition
580 (setf pdefinition (make-definition :name :start :child nil))
581 (setf (grammar-start *grammar*) pdefinition))
582 (when *include-body-p*
583 (setf *include-start* pdefinition))
584 (cond
585 ((defn-child pdefinition)
586 (ecase (defn-redefinition pdefinition)
587 (:not-being-redefined
588 (when (and combine
589 (defn-combine-method pdefinition)
590 (not (eq combine
591 (defn-combine-method pdefinition))))
592 (rng-error source "conflicting combine values for <start>"))
593 (unless combine
594 (when (defn-head-p pdefinition)
595 (rng-error source "multiple definitions for <start>"))
596 (setf (defn-head-p pdefinition) t))
597 (unless (defn-combine-method pdefinition)
598 (setf (defn-combine-method pdefinition) combine))
599 (setf (defn-child pdefinition)
600 (case (defn-combine-method pdefinition)
601 (:choice
602 (make-choice (defn-child pdefinition) child))
603 (:interleave
604 (make-interleave (defn-child pdefinition) child)))))
605 (:being-redefined-and-no-original
606 (setf (defn-redefinition pdefinition)
607 :being-redefined-and-original))
608 (:being-redefined-and-original)))
610 (setf (defn-child pdefinition) child)
611 (setf (defn-combine-method pdefinition) combine)
612 (setf (defn-head-p pdefinition) (null combine))
613 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
615 (defun zip (constructor children)
616 (cond
617 ((null children)
618 (rng-error nil "empty choice?"))
619 ((null (cdr children))
620 (car children))
622 (destructuring-bind (a b &rest rest)
623 children
624 (zip constructor (cons (funcall constructor a b) rest))))))
626 (defun choice-ify (children) (zip #'make-choice children))
627 (defun groupify (children) (zip #'make-group children))
628 (defun interleave-ify (children) (zip #'make-interleave children))
630 (defun find-definition (name &optional (grammar *grammar*))
631 (gethash name (grammar-definitions grammar)))
633 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
634 (setf (gethash name (grammar-definitions grammar)) newval))
636 (defun process-define (source)
637 (klacks:expecting-element (source "define")
638 (let* ((name (ntc "name" source))
639 (combine0 (ntc "combine" source))
640 (combine (when combine0
641 (find-symbol (string-upcase combine0) :keyword)))
642 (child (groupify
643 (progn
644 (consume-and-skip-to-native source)
645 (p/pattern+ source))))
646 (pdefinition (find-definition name)))
647 (unless pdefinition
648 (setf pdefinition (make-definition :name name :child nil))
649 (setf (find-definition name) pdefinition))
650 (when *include-body-p*
651 (push pdefinition *include-definitions*))
652 (cond
653 ((defn-child pdefinition)
654 (case (defn-redefinition pdefinition)
655 (:not-being-redefined
656 (when (and combine
657 (defn-combine-method pdefinition)
658 (not (eq combine
659 (defn-combine-method pdefinition))))
660 (rng-error source "conflicting combine values for ~A" name))
661 (unless combine
662 (when (defn-head-p pdefinition)
663 (rng-error source "multiple definitions for ~A" name))
664 (setf (defn-head-p pdefinition) t))
665 (unless (defn-combine-method pdefinition)
666 (setf (defn-combine-method pdefinition) combine))
667 (setf (defn-child pdefinition)
668 (case (defn-combine-method pdefinition)
669 (:choice
670 (make-choice (defn-child pdefinition) child))
671 (:interleave
672 (make-interleave (defn-child pdefinition) child)))))
673 (:being-redefined-and-no-original
674 (setf (defn-redefinition pdefinition)
675 :being-redefined-and-original))
676 (:being-redefined-and-original)))
678 (setf (defn-child pdefinition) child)
679 (setf (defn-combine-method pdefinition) combine)
680 (setf (defn-head-p pdefinition) (null combine))
681 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
683 (defun process-div (source)
684 (klacks:expecting-element (source "div")
685 (consume-and-skip-to-native source)
686 (process-grammar-content* source)))
688 (defun reset-definition-for-include (defn)
689 (setf (defn-combine-method defn) nil)
690 (setf (defn-redefinition defn) :being-redefined-and-no-original)
691 (setf (defn-head-p defn) nil))
693 (defun restore-definition (defn original)
694 (setf (defn-combine-method defn) (defn-combine-method original))
695 (setf (defn-redefinition defn) (defn-redefinition original))
696 (setf (defn-head-p defn) (defn-head-p original)))
698 (defun process-include (source)
699 (klacks:expecting-element (source "include")
700 (let* ((href
701 (escape-uri (attribute "href" (klacks:list-attributes source))))
702 (base (klacks:current-xml-base source))
703 (uri (safe-parse-uri source href base))
704 (*include-start* nil)
705 (*include-definitions* '()))
706 (consume-and-skip-to-native source)
707 (let ((*include-body-p* t))
708 (process-grammar-content* source :disallow-include t))
709 (let ((tmp-start
710 (when *include-start*
711 (prog1
712 (copy-structure *include-start*)
713 (reset-definition-for-include *include-start*))))
714 (tmp-defns
715 (loop
716 for defn in *include-definitions*
717 collect
718 (prog1
719 (copy-structure defn)
720 (reset-definition-for-include defn)))))
721 (when (find uri *include-uri-stack* :test #'puri:uri=)
722 (rng-error source "looping include"))
723 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
724 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri)))
725 (klacks:with-open-source (source (cxml:make-source xstream))
726 (invoke-with-klacks-handler
727 (lambda ()
728 (klacks:find-event source :start-element)
729 (let ((*datatype-library* ""))
730 (p/grammar source *grammar*)))
731 source))
732 (check-pattern-definitions source *grammar*)
733 (when tmp-start
734 (restore-definition *include-start* tmp-start))
735 (dolist (copy tmp-defns)
736 (let ((defn (gethash (defn-name copy)
737 (grammar-definitions *grammar*))))
738 (restore-definition defn copy)))
739 (defn-child (grammar-start *grammar*)))))))
741 (defun check-pattern-definitions (source grammar)
742 (when (eq (defn-redefinition (grammar-start grammar))
743 :being-redefined-and-no-original)
744 (rng-error source "start not found in redefinition of grammar"))
745 (loop for defn being each hash-value in (grammar-definitions grammar) do
746 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
747 (rng-error source "redefinition not found in grammar"))
748 (unless (defn-child defn)
749 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
751 (defvar *any-name-allowed-p* t)
752 (defvar *ns-name-allowed-p* t)
754 (defun destructure-name (source qname)
755 (multiple-value-bind (uri lname)
756 (klacks:decode-qname qname source)
757 (setf uri (or uri *namespace-uri*))
758 (when (and *attribute-namespace-p*
759 (or (and (equal lname "xmlns") (equal uri ""))
760 (equal uri "http://www.w3.org/2000/xmlns")))
761 (rng-error source "namespace attribute not permitted"))
762 (make-name uri lname)))
764 (defun p/name-class (source)
765 (klacks:expecting-element (source)
766 (with-library-and-ns (klacks:list-attributes source)
767 (case (find-symbol (klacks:current-lname source) :keyword)
768 (:|name|
769 (let ((qname (string-trim *whitespace*
770 (consume-and-parse-characters source))))
771 (destructure-name source qname)))
772 (:|anyName|
773 (unless *any-name-allowed-p*
774 (rng-error source "anyname now permitted in except"))
775 (klacks:consume source)
776 (prog1
777 (let ((*any-name-allowed-p* nil))
778 (make-any-name (p/except-name-class? source)))
779 (skip-to-native source)))
780 (:|nsName|
781 (unless *ns-name-allowed-p*
782 (rng-error source "nsname now permitted in except"))
783 (let ((uri *namespace-uri*)
784 (*any-name-allowed-p* nil)
785 (*ns-name-allowed-p* nil))
786 (when (and *attribute-namespace-p*
787 (equal uri "http://www.w3.org/2000/xmlns"))
788 (rng-error source "namespace attribute not permitted"))
789 (klacks:consume source)
790 (prog1
791 (make-ns-name uri (p/except-name-class? source))
792 (skip-to-native source))))
793 (:|choice|
794 (klacks:consume source)
795 (simplify-nc-choice (p/name-class* source)))
797 (rng-error source "invalid child in except"))))))
799 (defun p/name-class* (source)
800 (let ((results nil))
801 (loop
802 (skip-to-native source)
803 (case (klacks:peek source)
804 (:start-element (push (p/name-class source) results))
805 (:end-element (return)))
806 (klacks:consume source))
807 (nreverse results)))
809 (defun p/except-name-class? (source)
810 (skip-to-native source)
811 (multiple-value-bind (key uri lname)
812 (klacks:peek source)
814 (if (and (eq key :start-element)
815 (string= (find-symbol lname :keyword) "except"))
816 (p/except-name-class source)
817 nil)))
819 (defun p/except-name-class (source)
820 (klacks:expecting-element (source "except")
821 (with-library-and-ns (klacks:list-attributes source)
822 (klacks:consume source)
823 (let ((x (p/name-class* source)))
824 (if (cdr x)
825 (simplify-nc-choice x)
826 (car x))))))
828 (defun escape-uri (string)
829 (with-output-to-string (out)
830 (loop for c across (cxml::rod-to-utf8-string string) do
831 (let ((code (char-code c)))
832 ;; http://www.w3.org/TR/xlink/#link-locators
833 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
834 (format out "%~2,'0X" code)
835 (write-char c out))))))
838 ;;;; unparsing
840 (defvar *definitions-to-names*)
841 (defvar *seen-names*)
843 (defun serialization-name (defn)
844 (or (gethash defn *definitions-to-names*)
845 (setf (gethash defn *definitions-to-names*)
846 (let ((name (if (gethash (defn-name defn) *seen-names*)
847 (format nil "~A-~D"
848 (defn-name defn)
849 (hash-table-count *seen-names*))
850 (defn-name defn))))
851 (setf (gethash name *seen-names*) defn)
852 name))))
854 (defun serialize-grammar (grammar sink)
855 (cxml:with-xml-output sink
856 (let ((*definitions-to-names* (make-hash-table))
857 (*seen-names* (make-hash-table :test 'equal)))
858 (cxml:with-element "grammar"
859 (cxml:with-element "start"
860 (serialize-pattern (parsed-grammar-pattern grammar)))
861 (loop for defn being each hash-key in *definitions-to-names* do
862 (serialize-definition defn))))))
864 (defun serialize-pattern (pattern)
865 (etypecase pattern
866 (element
867 (cxml:with-element "element"
868 (serialize-name (pattern-name pattern))
869 (serialize-pattern (pattern-child pattern))))
870 (attribute
871 (cxml:with-element "attribute"
872 (serialize-name (pattern-name pattern))
873 (serialize-pattern (pattern-child pattern))))
874 (%combination
875 (cxml:with-element
876 (etypecase pattern
877 (group "group")
878 (interleave "interleave")
879 (choice "choice"))
880 (serialize-pattern (pattern-a pattern))
881 (serialize-pattern (pattern-b pattern))))
882 (one-or-more
883 (cxml:with-element "oneOrmore"
884 (serialize-pattern (pattern-child pattern))))
885 (list-pattern
886 (cxml:with-element "list"
887 (serialize-pattern (pattern-child pattern))))
888 (ref
889 (cxml:with-element "ref"
890 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
891 (empty
892 (cxml:with-element "empty"))
893 (not-allowed
894 (cxml:with-element "notAllowed"))
895 (text
896 (cxml:with-element "text"))
897 (value
898 (cxml:with-element "value"
899 (cxml:attribute "datatype-library"
900 (pattern-datatype-library pattern))
901 (cxml:attribute "type" (pattern-type pattern))
902 (cxml:attribute "ns" (pattern-ns pattern))
903 (cxml:text (pattern-string pattern))))
904 (data
905 (cxml:with-element "value"
906 (cxml:attribute "datatype-library"
907 (pattern-datatype-library pattern))
908 (cxml:attribute "type" (pattern-type pattern))
909 (dolist (param (pattern-params pattern))
910 (cxml:with-element "param"
911 (cxml:attribute "name" (param-name param))
912 (cxml:text (param-string param))))
913 (when (pattern-except pattern)
914 (cxml:with-element "except"
915 (serialize-pattern (pattern-except pattern))))))))
917 (defun serialize-definition (defn)
918 (cxml:with-element "define"
919 (cxml:attribute "name" (serialization-name defn))
920 (serialize-pattern (defn-child defn))))
922 (defun serialize-name (name)
923 (etypecase name
924 (name
925 (cxml:with-element "name"
926 (cxml:attribute "ns" (name-uri name))
927 (cxml:text (name-lname name))))
928 (any-name
929 (cxml:with-element "anyName"
930 (when (any-name-except name)
931 (serialize-except-name (any-name-except name)))))
932 (ns-name
933 (cxml:with-element "anyName"
934 (cxml:attribute "ns" (ns-name-uri name))
935 (when (ns-name-except name)
936 (serialize-except-name (ns-name-except name)))))
937 (name-class-choice
938 (cxml:with-element "choice"
939 (serialize-name (name-class-choice-a name))
940 (serialize-name (name-class-choice-b name))))))
942 (defun serialize-except-name (spec)
943 (cxml:with-element "except"
944 (serialize-name (cdr spec))))
947 ;;;; simplification
949 ;;; 4.1 Annotations
950 ;;; Foreign attributes and elements are removed implicitly while parsing.
952 ;;; 4.2 Whitespace
953 ;;; All character data is discarded while parsing (which can only be
954 ;;; whitespace after validation).
956 ;;; Whitespace in name, type, and combine attributes is stripped while
957 ;;; parsing. Ditto for <name/>.
959 ;;; 4.3. datatypeLibrary attribute
960 ;;; Escaping is done by p/pattern.
961 ;;; Attribute value defaulting is done using *datatype-library*; only
962 ;;; p/data and p/value record the computed value.
964 ;;; 4.4. type attribute of value element
965 ;;; Done by p/value.
967 ;;; 4.5. href attribute
968 ;;; Escaping is done by process-include and p/external-ref.
970 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
971 ;;; but that requires xstream hacking.
973 ;;; 4.6. externalRef element
974 ;;; Done by p/external-ref.
976 ;;; 4.7. include element
977 ;;; Done by process-include.
979 ;;; 4.8. name attribute of element and attribute elements
980 ;;; `name' is stored as a slot, not a child. Done by p/element and
981 ;;; p/attribute.
983 ;;; 4.9. ns attribute
984 ;;; done by p/name-class, p/value, p/element, p/attribute
986 ;;; 4.10. QNames
987 ;;; done by p/name-class
989 ;;; 4.11. div element
990 ;;; Legen wir gar nicht erst an.
992 ;;; 4.12. 4.13 4.14 4.15
993 ;;; beim anlegen
995 ;;; 4.16
996 ;;; p/name-class
997 ;;; -- ausser der sache mit den datentypen
999 ;;; 4.17, 4.18, 4.19
1000 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
1001 ;;; beschrieben.
1003 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
1004 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
1005 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
1006 ;;; dafuer beim Serialisieren um.
1008 (defmethod check-recursion ((pattern element) depth)
1009 (check-recursion (pattern-child pattern) (1+ depth)))
1011 (defmethod check-recursion ((pattern ref) depth)
1012 (when (eql (pattern-crdepth pattern) depth)
1013 (rng-error nil "infinite recursion in ~A"
1014 (defn-name (pattern-target pattern))))
1015 (when (null (pattern-crdepth pattern))
1016 (setf (pattern-crdepth pattern) depth)
1017 (check-recursion (defn-child (pattern-target pattern)) depth)
1018 (setf (pattern-crdepth pattern) t)))
1020 (defmethod check-recursion ((pattern %parent) depth)
1021 (check-recursion (pattern-child pattern) depth))
1023 (defmethod check-recursion ((pattern %combination) depth)
1024 (check-recursion (pattern-a pattern) depth)
1025 (check-recursion (pattern-b pattern) depth))
1027 (defmethod check-recursion ((pattern %leaf) depth)
1028 (declare (ignore depth)))
1030 (defmethod check-recursion ((pattern data) depth)
1031 (when (pattern-except pattern)
1032 (check-recursion (pattern-except pattern) depth)))
1035 ;;;; 4.20
1037 ;;; %PARENT
1039 (defmethod fold-not-allowed ((pattern element))
1040 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1041 pattern)
1043 (defmethod fold-not-allowed ((pattern %parent))
1044 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1045 (if (typep (pattern-child pattern) 'not-allowed)
1046 (pattern-child pattern)
1047 pattern))
1049 ;;; %COMBINATION
1051 (defmethod fold-not-allowed ((pattern %combination))
1052 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1053 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1054 pattern)
1056 (defmethod fold-not-allowed ((pattern group))
1057 (call-next-method)
1058 (cond
1059 ;; remove if any child is not allowed
1060 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1061 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1062 (t pattern)))
1064 (defmethod fold-not-allowed ((pattern interleave))
1065 (call-next-method)
1066 (cond
1067 ;; remove if any child is not allowed
1068 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1069 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1070 (t pattern)))
1072 (defmethod fold-not-allowed ((pattern choice))
1073 (call-next-method)
1074 (cond
1075 ;; if any child is not allowed, choose the other
1076 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1077 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1078 (t pattern)))
1080 ;;; LEAF
1082 (defmethod fold-not-allowed ((pattern %leaf))
1083 pattern)
1085 (defmethod fold-not-allowed ((pattern data))
1086 (when (pattern-except pattern)
1087 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1088 (when (typep (pattern-except pattern) 'not-allowed)
1089 (setf (pattern-except pattern) nil)))
1090 pattern)
1092 ;;; REF
1094 (defmethod fold-not-allowed ((pattern ref))
1095 pattern)
1098 ;;;; 4.21
1100 ;;; %PARENT
1102 (defmethod fold-empty ((pattern one-or-more))
1103 (call-next-method)
1104 (if (typep (pattern-child pattern) 'empty)
1105 (pattern-child pattern)
1106 pattern))
1108 (defmethod fold-empty ((pattern %parent))
1109 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1110 pattern)
1112 ;;; %COMBINATION
1114 (defmethod fold-empty ((pattern %combination))
1115 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1116 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1117 pattern)
1119 (defmethod fold-empty ((pattern group))
1120 (call-next-method)
1121 (cond
1122 ;; if any child is empty, choose the other
1123 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1124 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1125 (t pattern)))
1127 (defmethod fold-empty ((pattern interleave))
1128 (call-next-method)
1129 (cond
1130 ;; if any child is empty, choose the other
1131 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1132 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1133 (t pattern)))
1135 (defmethod fold-empty ((pattern choice))
1136 (call-next-method)
1137 (if (typep (pattern-b pattern) 'empty)
1138 (cond
1139 ((typep (pattern-a pattern) 'empty)
1140 (pattern-a pattern))
1142 (rotatef (pattern-a pattern) (pattern-b pattern))
1143 pattern))
1144 pattern))
1146 ;;; LEAF
1148 (defmethod fold-empty ((pattern %leaf))
1149 pattern)
1151 (defmethod fold-empty ((pattern data))
1152 (when (pattern-except pattern)
1153 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1154 pattern)
1156 ;;; REF
1158 (defmethod fold-empty ((pattern ref))
1159 pattern)