119
[cxml-rng.git] / parse.lisp
blobd88aacd414e82a478580765c3043bf9431381634
1 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 (in-package :cxml-rng)
29 #+sbcl
30 (declaim (optimize (debug 2)))
33 ;;;; Errors
35 (define-condition rng-error (simple-error) ())
37 (defun rng-error (source fmt &rest args)
38 (let ((s (make-string-output-stream)))
39 (apply #'format s fmt args)
40 (when source
41 (etypecase source
42 (klacks:source
43 (format s "~& [ Error at line ~D, column ~D in ~S ]"
44 (klacks:current-line-number source)
45 (klacks:current-column-number source)
46 (klacks:current-system-id source)))
47 (sax:sax-parser-mixin
48 (format s "~& [ Error at line ~D, column ~D in ~S ]"
49 (sax:line-number source)
50 (sax:column-number source)
51 (sax:system-id source))) ))
52 (error 'rng-error
53 :format-control "~A"
54 :format-arguments (list (get-output-stream-string s)))))
57 ;;;; Parser
59 (defvar *datatype-library*)
60 (defvar *namespace-uri*)
61 (defvar *ns*)
62 (defvar *entity-resolver*)
63 (defvar *external-href-stack*)
64 (defvar *include-uri-stack*)
65 (defvar *include-body-p* nil)
66 (defvar *grammar*)
68 (defvar *debug* nil)
70 (defstruct (parsed-grammar (:constructor make-parsed-grammar (pattern)))
71 (pattern (missing) :type pattern)
72 (interned-start nil :type (or null pattern))
73 (registratur nil :type (or null hash-table)))
75 (defmethod print-object ((object parsed-grammar) stream)
76 (print-unreadable-object (object stream :type t :identity t)))
78 (defun invoke-with-klacks-handler (fn source)
79 (if *debug*
80 (funcall fn)
81 (handler-case
82 (funcall fn)
83 (cxml:xml-parse-error (c)
84 (rng-error source "Cannot parse schema: ~A" c)))))
86 (defvar *validate-grammar* t)
87 (defparameter *relax-ng-grammar* nil)
89 (defun make-validating-source (input)
90 (let ((upstream (cxml:make-source input)))
91 (if *validate-grammar*
92 (klacks:make-tapping-source upstream
93 (make-validator *relax-ng-grammar*))
94 upstream)))
96 (defun parse-relax-ng (input &key entity-resolver)
97 (when *validate-grammar*
98 (unless *relax-ng-grammar*
99 (setf *relax-ng-grammar*
100 (let* ((*validate-grammar* nil)
101 (d (slot-value (asdf:find-system :cxml-rng)
102 'asdf::relative-pathname)))
103 (parse-relax-ng (merge-pathnames "rng.rng" d))))))
104 (klacks:with-open-source (source (make-validating-source input))
105 (invoke-with-klacks-handler
106 (lambda ()
107 (klacks:find-event source :start-element)
108 (let* ((*datatype-library* "")
109 (*namespace-uri* "")
110 (*entity-resolver* entity-resolver)
111 (*external-href-stack* '())
112 (*include-uri-stack* '())
113 (*grammar* (make-grammar nil))
114 (result (p/pattern source)))
115 (unless result
116 (rng-error nil "empty grammar"))
117 (setf (grammar-start *grammar*)
118 (make-definition :name :start :child result))
119 (check-pattern-definitions source *grammar*)
120 (check-recursion result 0)
121 (setf result (fold-not-allowed result))
122 (setf result (fold-empty result))
123 (make-parsed-grammar result)))
124 source)))
127 ;;;; pattern structures
129 (defstruct pattern)
131 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
132 child)
134 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
135 name)
136 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-")))
137 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-")))
139 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
140 a b)
141 (defstruct (group
142 (:include %combination)
143 (:constructor make-group (a b))))
144 (defstruct (interleave
145 (:include %combination)
146 (:constructor make-interleave (a b))))
147 (defstruct (choice
148 (:include %combination)
149 (:constructor make-choice (a b))))
150 (defstruct (after
151 (:include %combination)
152 (:constructor make-after (a b))))
154 (defstruct (one-or-more
155 (:include %parent)
156 (:constructor make-one-or-more (child))))
157 (defstruct (list-pattern
158 (:include %parent)
159 (:constructor make-list-pattern (child))))
161 (defstruct (ref
162 (:include pattern)
163 (:conc-name "PATTERN-")
164 (:constructor make-ref (target)))
165 crdepth
166 target)
168 (defstruct (%leaf (:include pattern)))
170 (defstruct (empty (:include %leaf) (:conc-name "PATTERN-")))
171 (defstruct (text (:include %leaf) (:conc-name "PATTERN-")))
173 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
174 type)
176 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
178 string
179 value)
181 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
182 params
183 except)
185 (defstruct (not-allowed (:include %leaf) (:conc-name "PATTERN-")))
188 ;;;; non-pattern
190 (defstruct (grammar (:constructor make-grammar (parent)))
191 (start nil)
192 parent
193 (definitions (make-hash-table :test 'equal)))
195 (defstruct param
196 name
197 string)
199 ;; Clark calls this structure "RefPattern"
200 (defstruct (definition (:conc-name "DEFN-"))
201 name
202 combine-method
203 head-p
204 redefinition
205 child)
208 ;;; name-class
210 (defun missing ()
211 (error "missing arg"))
213 (defstruct name-class)
215 (defstruct (any-name (:include name-class)
216 (:constructor make-any-name (except)))
217 (except (missing) :type (or null name-class)))
219 (defstruct (name (:include name-class)
220 (:constructor make-name (uri lname)))
221 (uri (missing) :type string)
222 (lname (missing) :type string))
224 (defstruct (ns-name (:include name-class)
225 (:constructor make-ns-name (uri except)))
226 (uri (missing) :type string)
227 (except (missing) :type (or null name-class)))
229 (defstruct (name-class-choice (:include name-class)
230 (:constructor make-name-class-choice (a b)))
231 (a (missing) :type name-class)
232 (b (missing) :type name-class))
234 (defun simplify-nc-choice (values)
235 (zip #'make-name-class-choice values))
238 ;;;; parser
240 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
242 (defun skip-foreign* (source)
243 (loop
244 (case (klacks:peek-next source)
245 (:start-element (skip-foreign source))
246 (:end-element (return)))))
248 (defun skip-to-native (source)
249 (loop
250 (case (klacks:peek source)
251 (:start-element
252 (when (equal (klacks:current-uri source) *rng-namespace*)
253 (return))
254 (klacks:serialize-element source nil))
255 (:end-element (return)))
256 (klacks:consume source)))
258 (defun consume-and-skip-to-native (source)
259 (klacks:consume source)
260 (skip-to-native source))
262 (defun skip-foreign (source)
263 (when (equal (klacks:current-uri source) *rng-namespace*)
264 (rng-error source
265 "invalid schema: ~A not allowed here"
266 (klacks:current-lname source)))
267 (klacks:serialize-element source nil))
269 (defun attribute (lname attrs)
270 (let ((a (sax:find-attribute-ns "" lname attrs)))
271 (if a
272 (sax:attribute-value a)
273 nil)))
275 (defparameter *whitespace*
276 (format nil "~C~C~C~C"
277 (code-char 9)
278 (code-char 32)
279 (code-char 13)
280 (code-char 10)))
282 (defun ntc (lname source-or-attrs)
283 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
284 (let* ((attrs
285 (if (listp source-or-attrs)
286 source-or-attrs
287 (klacks:list-attributes source-or-attrs)))
288 (a (sax:find-attribute-ns "" lname attrs)))
289 (if a
290 (string-trim *whitespace* (sax:attribute-value a))
291 nil)))
293 (defmacro with-library-and-ns (attrs &body body)
294 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
296 (defun invoke-with-library-and-ns (fn attrs)
297 (let* ((dl (attribute "datatypeLibrary" attrs))
298 (ns (attribute "ns" attrs))
299 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
300 (*namespace-uri* (or ns *namespace-uri*))
301 (*ns* ns))
302 (funcall fn)))
304 (defun p/pattern (source)
305 (let* ((lname (klacks:current-lname source))
306 (attrs (klacks:list-attributes source)))
307 (with-library-and-ns attrs
308 (case (find-symbol lname :keyword)
309 (:|element| (p/element source (ntc "name" attrs)))
310 (:|attribute| (p/attribute source (ntc "name" attrs)))
311 (:|group| (p/combination #'groupify source))
312 (:|interleave| (p/combination #'interleave-ify source))
313 (:|choice| (p/combination #'choice-ify source))
314 (:|optional| (p/optional source))
315 (:|zeroOrMore| (p/zero-or-more source))
316 (:|oneOrMore| (p/one-or-more source))
317 (:|list| (p/list source))
318 (:|mixed| (p/mixed source))
319 (:|ref| (p/ref source))
320 (:|parentRef| (p/parent-ref source))
321 (:|empty| (p/empty source))
322 (:|text| (p/text source))
323 (:|value| (p/value source))
324 (:|data| (p/data source))
325 (:|notAllowed| (p/not-allowed source))
326 (:|externalRef| (p/external-ref source))
327 (:|grammar| (p/grammar source))
328 (t (skip-foreign source))))))
330 (defun p/pattern+ (source)
331 (let ((children nil))
332 (loop
333 (case (klacks:peek source)
334 (:start-element
335 (let ((p (p/pattern source))) (when p (push p children))))
336 (:end-element
337 (return))
339 (klacks:consume source))))
340 (unless children
341 (rng-error source "empty element"))
342 (nreverse children)))
344 (defun p/pattern? (source)
345 (let ((result nil))
346 (loop
347 (skip-to-native source)
348 (case (klacks:peek source)
349 (:start-element
350 (when result
351 (rng-error source "at most one pattern expected here"))
352 (setf result (p/pattern source)))
353 (:end-element
354 (return))
356 (klacks:consume source))))
357 result))
359 (defun p/element (source name)
360 (klacks:expecting-element (source "element")
361 (let ((result (make-element)))
362 (consume-and-skip-to-native source)
363 (if name
364 (setf (pattern-name result) (destructure-name source name))
365 (setf (pattern-name result) (p/name-class source)))
366 (skip-to-native source)
367 (setf (pattern-child result) (groupify (p/pattern+ source)))
368 result)))
370 (defvar *attribute-namespace-p* nil)
372 (defun p/attribute (source name)
373 (klacks:expecting-element (source "attribute")
374 (let ((result (make-attribute)))
375 (consume-and-skip-to-native source)
376 (if name
377 (setf (pattern-name result)
378 (let ((*namespace-uri* (or *ns* "")))
379 (destructure-name source name)))
380 (setf (pattern-name result)
381 (let ((*attribute-namespace-p* t))
382 (p/name-class source))))
383 (skip-to-native source)
384 (setf (pattern-child result)
385 (or (p/pattern? source) (make-text)))
386 result)))
388 (defun p/combination (zipper source)
389 (klacks:expecting-element (source)
390 (consume-and-skip-to-native source)
391 (funcall zipper (p/pattern+ source))))
393 (defun p/one-or-more (source)
394 (klacks:expecting-element (source "oneOrMore")
395 (consume-and-skip-to-native source)
396 (let ((children (p/pattern+ source)))
397 (make-one-or-more (groupify children)))))
399 (defun p/zero-or-more (source)
400 (klacks:expecting-element (source "zeroOrMore")
401 (consume-and-skip-to-native source)
402 (let ((children (p/pattern+ source)))
403 (make-choice (make-one-or-more (groupify children))
404 (make-empty)))))
406 (defun p/optional (source)
407 (klacks:expecting-element (source "optional")
408 (consume-and-skip-to-native source)
409 (let ((children (p/pattern+ source)))
410 (make-choice (groupify children) (make-empty)))))
412 (defun p/list (source)
413 (klacks:expecting-element (source "list")
414 (consume-and-skip-to-native source)
415 (let ((children (p/pattern+ source)))
416 (make-list-pattern (groupify children)))))
418 (defun p/mixed (source)
419 (klacks:expecting-element (source "mixed")
420 (consume-and-skip-to-native source)
421 (let ((children (p/pattern+ source)))
422 (make-interleave (groupify children) (make-text)))))
424 (defun p/ref (source)
425 (klacks:expecting-element (source "ref")
426 (prog1
427 (let* ((name (ntc "name" source))
428 (pdefinition
429 (or (find-definition name)
430 (setf (find-definition name)
431 (make-definition :name name :child nil)))))
432 (make-ref pdefinition))
433 (skip-foreign* source))))
435 (defun p/parent-ref (source)
436 (klacks:expecting-element (source "parentRef")
437 (prog1
438 (let* ((name (ntc "name" source))
439 (grammar (grammar-parent *grammar*))
440 (pdefinition
441 (or (find-definition name grammar)
442 (setf (find-definition name grammar)
443 (make-definition :name name :child nil)))))
444 (make-ref pdefinition))
445 (skip-foreign* source))))
447 (defun p/empty (source)
448 (klacks:expecting-element (source "empty")
449 (skip-foreign* source)
450 (make-empty)))
452 (defun p/text (source)
453 (klacks:expecting-element (source "text")
454 (skip-foreign* source)
455 (make-text)))
457 (defun consume-and-parse-characters (source)
458 ;; fixme
459 (let ((tmp ""))
460 (loop
461 (multiple-value-bind (key data) (klacks:peek-next source)
462 (case key
463 (:characters
464 (setf tmp (concatenate 'string tmp data)))
465 (:end-element (return)))))
466 tmp))
468 (defun p/value (source)
469 (klacks:expecting-element (source "value")
470 (let* ((type (ntc "type" source))
471 (string (consume-and-parse-characters source))
472 (ns *namespace-uri*)
473 (dl *datatype-library*))
474 (unless type
475 (setf type "token")
476 (setf dl ""))
477 (let ((data-type
478 (cxml-types:find-type (and dl (find-symbol dl :keyword)) type))
479 (vc (cxml-types:make-klacks-validation-context source)))
480 (unless data-type
481 (rng-error source "type not found: ~A/~A" type dl))
482 (make-value :string string
483 :value (cxml-types:parse data-type string vc)
484 :type data-type
485 :ns ns)))))
487 (defun p/data (source)
488 (klacks:expecting-element (source "data")
489 (let* ((type (ntc "type" source))
490 (params '())
491 (except nil))
492 (loop
493 (multiple-value-bind (key uri lname)
494 (klacks:peek-next source)
496 (case key
497 (:start-element
498 (case (find-symbol lname :keyword)
499 (:|param| (push (p/param source) params))
500 (:|except|
501 (setf except (p/except-pattern source))
502 (skip-to-native source)
503 (return))
504 (t (skip-foreign source))))
505 (:end-element
506 (return)))))
507 (setf params (nreverse params))
508 (let* ((dl *datatype-library*)
509 (data-type (apply #'cxml-types:find-type
510 (and dl (find-symbol dl :keyword))
511 type
512 (loop
513 for p in params
514 collect (find-symbol (string-invertcase
515 (param-name p))
516 :keyword)
517 collect (param-string p)))))
518 (unless data-type
519 (rng-error source "type not found: ~A/~A" type dl))
520 (make-data
521 :type data-type
522 :params params
523 :except except)))))
525 (defun string-invertcase (str)
526 (loop
527 with result = (copy-seq str)
528 for c across str
529 for i from 0
531 (setf (char result i)
532 (if (lower-case-p c)
533 (char-upcase c)
534 (char-downcase c)))
535 finally (return result)))
537 (defun p/param (source)
538 (klacks:expecting-element (source "param")
539 (let ((name (ntc "name" source))
540 (string (consume-and-parse-characters source)))
541 (make-param :name name :string string))))
543 (defun p/except-pattern (source)
544 (klacks:expecting-element (source "except")
545 (with-library-and-ns (klacks:list-attributes source)
546 (klacks:consume source)
547 (choice-ify (p/pattern+ source)))))
549 (defun p/not-allowed (source)
550 (klacks:expecting-element (source "notAllowed")
551 (consume-and-skip-to-native source)
552 (make-not-allowed)))
554 (defun safe-parse-uri (source str &optional base)
555 (when (zerop (length str))
556 (rng-error source "missing URI"))
557 (handler-case
558 (if base
559 (puri:merge-uris str base)
560 (puri:parse-uri str))
561 (puri:uri-parse-error ()
562 (rng-error source "invalid URI: ~A" str))))
564 (defun p/external-ref (source)
565 (klacks:expecting-element (source "externalRef")
566 (let* ((href
567 (escape-uri (attribute "href" (klacks:list-attributes source))))
568 (base (klacks:current-xml-base source))
569 (uri (safe-parse-uri source href base)))
570 (when (find uri *include-uri-stack* :test #'puri:uri=)
571 (rng-error source "looping include"))
572 (prog1
573 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
574 (xstream
575 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
576 (klacks:with-open-source (source (make-validating-source xstream))
577 (invoke-with-klacks-handler
578 (lambda ()
579 (klacks:find-event source :start-element)
580 (let ((*datatype-library* ""))
581 (p/pattern source)))
582 source)))
583 (skip-foreign* source)))))
585 (defun p/grammar (source &optional grammar)
586 (klacks:expecting-element (source "grammar")
587 (consume-and-skip-to-native source)
588 (let ((*grammar* (or grammar (make-grammar *grammar*)))
589 (includep grammar))
590 (process-grammar-content* source)
591 (unless (or includep (grammar-start *grammar*))
592 (rng-error source "no <start> in grammar"))
593 (unless includep
594 (check-pattern-definitions source *grammar*)
595 (defn-child (grammar-start *grammar*))))))
597 (defvar *include-start*)
598 (defvar *include-definitions*)
600 (defun process-grammar-content* (source &key disallow-include)
601 (loop
602 (multiple-value-bind (key uri lname) (klacks:peek source)
604 (case key
605 (:start-element
606 (with-library-and-ns (klacks:list-attributes source)
607 (case (find-symbol lname :keyword)
608 (:|start| (process-start source))
609 (:|define| (process-define source))
610 (:|div| (process-div source))
611 (:|include|
612 (when disallow-include
613 (rng-error source "nested include not permitted"))
614 (process-include source))
616 (skip-foreign source)))))
617 (:end-element
618 (return))))
619 (klacks:consume source)))
621 (defun process-start (source)
622 (klacks:expecting-element (source "start")
623 (let* ((combine0 (ntc "combine" source))
624 (combine
625 (when combine0
626 (find-symbol (string-upcase combine0) :keyword)))
627 (child
628 (progn
629 (consume-and-skip-to-native source)
630 (p/pattern source)))
631 (pdefinition (grammar-start *grammar*)))
632 (skip-foreign* source)
633 ;; fixme: shared code with process-define
634 (unless pdefinition
635 (setf pdefinition (make-definition :name :start :child nil))
636 (setf (grammar-start *grammar*) pdefinition))
637 (when *include-body-p*
638 (setf *include-start* pdefinition))
639 (cond
640 ((defn-child pdefinition)
641 (ecase (defn-redefinition pdefinition)
642 (:not-being-redefined
643 (when (and combine
644 (defn-combine-method pdefinition)
645 (not (eq combine
646 (defn-combine-method pdefinition))))
647 (rng-error source "conflicting combine values for <start>"))
648 (unless combine
649 (when (defn-head-p pdefinition)
650 (rng-error source "multiple definitions for <start>"))
651 (setf (defn-head-p pdefinition) t))
652 (unless (defn-combine-method pdefinition)
653 (setf (defn-combine-method pdefinition) combine))
654 (setf (defn-child pdefinition)
655 (case (defn-combine-method pdefinition)
656 (:choice
657 (make-choice (defn-child pdefinition) child))
658 (:interleave
659 (make-interleave (defn-child pdefinition) child)))))
660 (:being-redefined-and-no-original
661 (setf (defn-redefinition pdefinition)
662 :being-redefined-and-original))
663 (:being-redefined-and-original)))
665 (setf (defn-child pdefinition) child)
666 (setf (defn-combine-method pdefinition) combine)
667 (setf (defn-head-p pdefinition) (null combine))
668 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
670 (defun zip (constructor children)
671 (cond
672 ((null children)
673 (rng-error nil "empty choice?"))
674 ((null (cdr children))
675 (car children))
677 (destructuring-bind (a b &rest rest)
678 children
679 (zip constructor (cons (funcall constructor a b) rest))))))
681 (defun choice-ify (children) (zip #'make-choice children))
682 (defun groupify (children) (zip #'make-group children))
683 (defun interleave-ify (children) (zip #'make-interleave children))
685 (defun find-definition (name &optional (grammar *grammar*))
686 (gethash name (grammar-definitions grammar)))
688 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
689 (setf (gethash name (grammar-definitions grammar)) newval))
691 (defun process-define (source)
692 (klacks:expecting-element (source "define")
693 (let* ((name (ntc "name" source))
694 (combine0 (ntc "combine" source))
695 (combine (when combine0
696 (find-symbol (string-upcase combine0) :keyword)))
697 (child (groupify
698 (progn
699 (consume-and-skip-to-native source)
700 (p/pattern+ source))))
701 (pdefinition (find-definition name)))
702 (unless pdefinition
703 (setf pdefinition (make-definition :name name :child nil))
704 (setf (find-definition name) pdefinition))
705 (when *include-body-p*
706 (push pdefinition *include-definitions*))
707 (cond
708 ((defn-child pdefinition)
709 (case (defn-redefinition pdefinition)
710 (:not-being-redefined
711 (when (and combine
712 (defn-combine-method pdefinition)
713 (not (eq combine
714 (defn-combine-method pdefinition))))
715 (rng-error source "conflicting combine values for ~A" name))
716 (unless combine
717 (when (defn-head-p pdefinition)
718 (rng-error source "multiple definitions for ~A" name))
719 (setf (defn-head-p pdefinition) t))
720 (unless (defn-combine-method pdefinition)
721 (setf (defn-combine-method pdefinition) combine))
722 (setf (defn-child pdefinition)
723 (case (defn-combine-method pdefinition)
724 (:choice
725 (make-choice (defn-child pdefinition) child))
726 (:interleave
727 (make-interleave (defn-child pdefinition) child)))))
728 (:being-redefined-and-no-original
729 (setf (defn-redefinition pdefinition)
730 :being-redefined-and-original))
731 (:being-redefined-and-original)))
733 (setf (defn-child pdefinition) child)
734 (setf (defn-combine-method pdefinition) combine)
735 (setf (defn-head-p pdefinition) (null combine))
736 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
738 (defun process-div (source)
739 (klacks:expecting-element (source "div")
740 (consume-and-skip-to-native source)
741 (process-grammar-content* source)))
743 (defun reset-definition-for-include (defn)
744 (setf (defn-combine-method defn) nil)
745 (setf (defn-redefinition defn) :being-redefined-and-no-original)
746 (setf (defn-head-p defn) nil))
748 (defun restore-definition (defn original)
749 (setf (defn-combine-method defn) (defn-combine-method original))
750 (setf (defn-redefinition defn) (defn-redefinition original))
751 (setf (defn-head-p defn) (defn-head-p original)))
753 (defun process-include (source)
754 (klacks:expecting-element (source "include")
755 (let* ((href
756 (escape-uri (attribute "href" (klacks:list-attributes source))))
757 (base (klacks:current-xml-base source))
758 (uri (safe-parse-uri source href base))
759 (*include-start* nil)
760 (*include-definitions* '()))
761 (consume-and-skip-to-native source)
762 (let ((*include-body-p* t))
763 (process-grammar-content* source :disallow-include t))
764 (let ((tmp-start
765 (when *include-start*
766 (prog1
767 (copy-structure *include-start*)
768 (reset-definition-for-include *include-start*))))
769 (tmp-defns
770 (loop
771 for defn in *include-definitions*
772 collect
773 (prog1
774 (copy-structure defn)
775 (reset-definition-for-include defn)))))
776 (when (find uri *include-uri-stack* :test #'puri:uri=)
777 (rng-error source "looping include"))
778 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
779 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri)))
780 (klacks:with-open-source (source (make-validating-source xstream))
781 (invoke-with-klacks-handler
782 (lambda ()
783 (klacks:find-event source :start-element)
784 (let ((*datatype-library* ""))
785 (p/grammar source *grammar*)))
786 source))
787 (when tmp-start
788 (when (eq (defn-redefinition *include-start*)
789 :being-redefined-and-no-original)
790 (rng-error source "start not found in redefinition of grammar"))
791 (restore-definition *include-start* tmp-start))
792 (dolist (copy tmp-defns)
793 (let ((defn (gethash (defn-name copy)
794 (grammar-definitions *grammar*))))
795 (when (eq (defn-redefinition defn)
796 :being-redefined-and-no-original)
797 (rng-error source "redefinition not found in grammar"))
798 (restore-definition defn copy)))
799 nil)))))
801 (defun check-pattern-definitions (source grammar)
802 (when (and (grammar-start grammar)
803 (eq (defn-redefinition (grammar-start grammar))
804 :being-redefined-and-no-original))
805 (rng-error source "start not found in redefinition of grammar"))
806 (loop for defn being each hash-value in (grammar-definitions grammar) do
807 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
808 (rng-error source "redefinition not found in grammar"))
809 (unless (defn-child defn)
810 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
812 (defvar *any-name-allowed-p* t)
813 (defvar *ns-name-allowed-p* t)
815 (defun destructure-name (source qname)
816 (multiple-value-bind (uri lname)
817 (klacks:decode-qname qname source)
818 (setf uri (or uri *namespace-uri*))
819 (when (and *attribute-namespace-p*
820 (or (and (equal lname "xmlns") (equal uri ""))
821 (equal uri "http://www.w3.org/2000/xmlns")))
822 (rng-error source "namespace attribute not permitted"))
823 (make-name uri lname)))
825 (defun p/name-class (source)
826 (klacks:expecting-element (source)
827 (with-library-and-ns (klacks:list-attributes source)
828 (case (find-symbol (klacks:current-lname source) :keyword)
829 (:|name|
830 (let ((qname (string-trim *whitespace*
831 (consume-and-parse-characters source))))
832 (destructure-name source qname)))
833 (:|anyName|
834 (unless *any-name-allowed-p*
835 (rng-error source "anyname now permitted in except"))
836 (klacks:consume source)
837 (prog1
838 (let ((*any-name-allowed-p* nil))
839 (make-any-name (p/except-name-class? source)))
840 (skip-to-native source)))
841 (:|nsName|
842 (unless *ns-name-allowed-p*
843 (rng-error source "nsname now permitted in except"))
844 (let ((uri *namespace-uri*)
845 (*any-name-allowed-p* nil)
846 (*ns-name-allowed-p* nil))
847 (when (and *attribute-namespace-p*
848 (equal uri "http://www.w3.org/2000/xmlns"))
849 (rng-error source "namespace attribute not permitted"))
850 (klacks:consume source)
851 (prog1
852 (make-ns-name uri (p/except-name-class? source))
853 (skip-to-native source))))
854 (:|choice|
855 (klacks:consume source)
856 (simplify-nc-choice (p/name-class* source)))
858 (rng-error source "invalid child in except"))))))
860 (defun p/name-class* (source)
861 (let ((results nil))
862 (loop
863 (skip-to-native source)
864 (case (klacks:peek source)
865 (:start-element (push (p/name-class source) results))
866 (:end-element (return)))
867 (klacks:consume source))
868 (nreverse results)))
870 (defun p/except-name-class? (source)
871 (skip-to-native source)
872 (multiple-value-bind (key uri lname)
873 (klacks:peek source)
875 (if (and (eq key :start-element)
876 (string= (find-symbol lname :keyword) "except"))
877 (p/except-name-class source)
878 nil)))
880 (defun p/except-name-class (source)
881 (klacks:expecting-element (source "except")
882 (with-library-and-ns (klacks:list-attributes source)
883 (klacks:consume source)
884 (let ((x (p/name-class* source)))
885 (if (cdr x)
886 (simplify-nc-choice x)
887 (car x))))))
889 (defun escape-uri (string)
890 (with-output-to-string (out)
891 (loop for c across (cxml::rod-to-utf8-string string) do
892 (let ((code (char-code c)))
893 ;; http://www.w3.org/TR/xlink/#link-locators
894 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
895 (format out "%~2,'0X" code)
896 (write-char c out))))))
899 ;;;; unparsing
901 (defvar *definitions-to-names*)
902 (defvar *seen-names*)
904 (defun serialization-name (defn)
905 (or (gethash defn *definitions-to-names*)
906 (setf (gethash defn *definitions-to-names*)
907 (let ((name (if (gethash (defn-name defn) *seen-names*)
908 (format nil "~A-~D"
909 (defn-name defn)
910 (hash-table-count *seen-names*))
911 (defn-name defn))))
912 (setf (gethash name *seen-names*) defn)
913 name))))
915 (defun serialize-grammar (grammar sink)
916 (cxml:with-xml-output sink
917 (let ((*definitions-to-names* (make-hash-table))
918 (*seen-names* (make-hash-table :test 'equal)))
919 (cxml:with-element "grammar"
920 (cxml:with-element "start"
921 (serialize-pattern (parsed-grammar-pattern grammar)))
922 (loop for defn being each hash-key in *definitions-to-names* do
923 (serialize-definition defn))))))
925 (defun serialize-pattern (pattern)
926 (etypecase pattern
927 (element
928 (cxml:with-element "element"
929 (serialize-name (pattern-name pattern))
930 (serialize-pattern (pattern-child pattern))))
931 (attribute
932 (cxml:with-element "attribute"
933 (serialize-name (pattern-name pattern))
934 (serialize-pattern (pattern-child pattern))))
935 (%combination
936 (cxml:with-element
937 (etypecase pattern
938 (group "group")
939 (interleave "interleave")
940 (choice "choice"))
941 (serialize-pattern (pattern-a pattern))
942 (serialize-pattern (pattern-b pattern))))
943 (one-or-more
944 (cxml:with-element "oneOrmore"
945 (serialize-pattern (pattern-child pattern))))
946 (list-pattern
947 (cxml:with-element "list"
948 (serialize-pattern (pattern-child pattern))))
949 (ref
950 (cxml:with-element "ref"
951 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
952 (empty
953 (cxml:with-element "empty"))
954 (not-allowed
955 (cxml:with-element "notAllowed"))
956 (text
957 (cxml:with-element "text"))
958 (value
959 (cxml:with-element "value"
960 (let ((type (pattern-type pattern)))
961 (cxml:attribute "datatype-library"
962 (symbol-name (cxml-types:type-library type)))
963 (cxml:attribute "type" (cxml-types:type-name type)))
964 (cxml:attribute "ns" (pattern-ns pattern))
965 (cxml:text (pattern-string pattern))))
966 (data
967 (cxml:with-element "value"
968 (let ((type (pattern-type pattern)))
969 (cxml:attribute "datatype-library"
970 (symbol-name (cxml-types:type-library type)))
971 (cxml:attribute "type" (cxml-types:type-name type)))
972 (dolist (param (pattern-params pattern))
973 (cxml:with-element "param"
974 (cxml:attribute "name" (param-name param))
975 (cxml:text (param-string param))))
976 (when (pattern-except pattern)
977 (cxml:with-element "except"
978 (serialize-pattern (pattern-except pattern))))))))
980 (defun serialize-definition (defn)
981 (cxml:with-element "define"
982 (cxml:attribute "name" (serialization-name defn))
983 (serialize-pattern (defn-child defn))))
985 (defun serialize-name (name)
986 (etypecase name
987 (name
988 (cxml:with-element "name"
989 (cxml:attribute "ns" (name-uri name))
990 (cxml:text (name-lname name))))
991 (any-name
992 (cxml:with-element "anyName"
993 (when (any-name-except name)
994 (serialize-except-name (any-name-except name)))))
995 (ns-name
996 (cxml:with-element "anyName"
997 (cxml:attribute "ns" (ns-name-uri name))
998 (when (ns-name-except name)
999 (serialize-except-name (ns-name-except name)))))
1000 (name-class-choice
1001 (cxml:with-element "choice"
1002 (serialize-name (name-class-choice-a name))
1003 (serialize-name (name-class-choice-b name))))))
1005 (defun serialize-except-name (spec)
1006 (cxml:with-element "except"
1007 (serialize-name (cdr spec))))
1010 ;;;; simplification
1012 ;;; 4.1 Annotations
1013 ;;; Foreign attributes and elements are removed implicitly while parsing.
1015 ;;; 4.2 Whitespace
1016 ;;; All character data is discarded while parsing (which can only be
1017 ;;; whitespace after validation).
1019 ;;; Whitespace in name, type, and combine attributes is stripped while
1020 ;;; parsing. Ditto for <name/>.
1022 ;;; 4.3. datatypeLibrary attribute
1023 ;;; Escaping is done by p/pattern.
1024 ;;; Attribute value defaulting is done using *datatype-library*; only
1025 ;;; p/data and p/value record the computed value.
1027 ;;; 4.4. type attribute of value element
1028 ;;; Done by p/value.
1030 ;;; 4.5. href attribute
1031 ;;; Escaping is done by process-include and p/external-ref.
1033 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
1034 ;;; but that requires xstream hacking.
1036 ;;; 4.6. externalRef element
1037 ;;; Done by p/external-ref.
1039 ;;; 4.7. include element
1040 ;;; Done by process-include.
1042 ;;; 4.8. name attribute of element and attribute elements
1043 ;;; `name' is stored as a slot, not a child. Done by p/element and
1044 ;;; p/attribute.
1046 ;;; 4.9. ns attribute
1047 ;;; done by p/name-class, p/value, p/element, p/attribute
1049 ;;; 4.10. QNames
1050 ;;; done by p/name-class
1052 ;;; 4.11. div element
1053 ;;; Legen wir gar nicht erst an.
1055 ;;; 4.12. 4.13 4.14 4.15
1056 ;;; beim anlegen
1058 ;;; 4.16
1059 ;;; p/name-class
1060 ;;; -- ausser der sache mit den datentypen
1062 ;;; 4.17, 4.18, 4.19
1063 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
1064 ;;; beschrieben.
1066 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
1067 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
1068 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
1069 ;;; dafuer beim Serialisieren um.
1071 (defmethod check-recursion ((pattern element) depth)
1072 (check-recursion (pattern-child pattern) (1+ depth)))
1074 (defmethod check-recursion ((pattern ref) depth)
1075 (when (eql (pattern-crdepth pattern) depth)
1076 (rng-error nil "infinite recursion in ~A"
1077 (defn-name (pattern-target pattern))))
1078 (when (null (pattern-crdepth pattern))
1079 (setf (pattern-crdepth pattern) depth)
1080 (check-recursion (defn-child (pattern-target pattern)) depth)
1081 (setf (pattern-crdepth pattern) t)))
1083 (defmethod check-recursion ((pattern %parent) depth)
1084 (check-recursion (pattern-child pattern) depth))
1086 (defmethod check-recursion ((pattern %combination) depth)
1087 (check-recursion (pattern-a pattern) depth)
1088 (check-recursion (pattern-b pattern) depth))
1090 (defmethod check-recursion ((pattern %leaf) depth)
1091 (declare (ignore depth)))
1093 (defmethod check-recursion ((pattern data) depth)
1094 (when (pattern-except pattern)
1095 (check-recursion (pattern-except pattern) depth)))
1098 ;;;; 4.20
1100 ;;; %PARENT
1102 (defmethod fold-not-allowed ((pattern element))
1103 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1104 pattern)
1106 (defmethod fold-not-allowed ((pattern %parent))
1107 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1108 (if (typep (pattern-child pattern) 'not-allowed)
1109 (pattern-child pattern)
1110 pattern))
1112 ;;; %COMBINATION
1114 (defmethod fold-not-allowed ((pattern %combination))
1115 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1116 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1117 pattern)
1119 (defmethod fold-not-allowed ((pattern group))
1120 (call-next-method)
1121 (cond
1122 ;; remove if any child is not allowed
1123 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1124 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1125 (t pattern)))
1127 (defmethod fold-not-allowed ((pattern interleave))
1128 (call-next-method)
1129 (cond
1130 ;; remove if any child is not allowed
1131 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1132 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1133 (t pattern)))
1135 (defmethod fold-not-allowed ((pattern choice))
1136 (call-next-method)
1137 (cond
1138 ;; if any child is not allowed, choose the other
1139 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1140 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1141 (t pattern)))
1143 ;;; LEAF
1145 (defmethod fold-not-allowed ((pattern %leaf))
1146 pattern)
1148 (defmethod fold-not-allowed ((pattern data))
1149 (when (pattern-except pattern)
1150 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1151 (when (typep (pattern-except pattern) 'not-allowed)
1152 (setf (pattern-except pattern) nil)))
1153 pattern)
1155 ;;; REF
1157 (defmethod fold-not-allowed ((pattern ref))
1158 pattern)
1161 ;;;; 4.21
1163 ;;; %PARENT
1165 (defmethod fold-empty ((pattern one-or-more))
1166 (call-next-method)
1167 (if (typep (pattern-child pattern) 'empty)
1168 (pattern-child pattern)
1169 pattern))
1171 (defmethod fold-empty ((pattern %parent))
1172 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1173 pattern)
1175 ;;; %COMBINATION
1177 (defmethod fold-empty ((pattern %combination))
1178 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1179 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1180 pattern)
1182 (defmethod fold-empty ((pattern group))
1183 (call-next-method)
1184 (cond
1185 ;; if any child is empty, choose the other
1186 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1187 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1188 (t pattern)))
1190 (defmethod fold-empty ((pattern interleave))
1191 (call-next-method)
1192 (cond
1193 ;; if any child is empty, choose the other
1194 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1195 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1196 (t pattern)))
1198 (defmethod fold-empty ((pattern choice))
1199 (call-next-method)
1200 (if (typep (pattern-b pattern) 'empty)
1201 (cond
1202 ((typep (pattern-a pattern) 'empty)
1203 (pattern-a pattern))
1205 (rotatef (pattern-a pattern) (pattern-b pattern))
1206 pattern))
1207 pattern))
1209 ;;; LEAF
1211 (defmethod fold-empty ((pattern %leaf))
1212 pattern)
1214 (defmethod fold-empty ((pattern data))
1215 (when (pattern-except pattern)
1216 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1217 pattern)
1219 ;;; REF
1221 (defmethod fold-empty ((pattern ref))
1222 pattern)