auf cxml-types umgestellt
[cxml-rng.git] / parse.lisp
blob3cd5a76b07a0c579ca037592b43d0df098a2c1a1
1 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
5 ;;; are met:
6 ;;;
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
9 ;;;
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
14 ;;;
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 (in-package :cxml-rng)
29 #+sbcl
30 (declaim (optimize (debug 2)))
33 ;;;; Errors
35 (define-condition rng-error (simple-error) ())
37 (defun rng-error (source fmt &rest args)
38 (let ((s (make-string-output-stream)))
39 (apply #'format s fmt args)
40 (when source
41 (etypecase source
42 (klacks:source
43 (format s "~& [ Error at line ~D, column ~D in ~S ]"
44 (klacks:current-line-number source)
45 (klacks:current-column-number source)
46 (klacks:current-system-id source)))
47 (sax:sax-parser-mixin
48 (format s "~& [ Error at line ~D, column ~D in ~S ]"
49 (sax:line-number source)
50 (sax:column-number source)
51 (sax:system-id source))) ))
52 (error 'rng-error
53 :format-control "~A"
54 :format-arguments (list (get-output-stream-string s)))))
57 ;;;; Parser
59 (defvar *datatype-library*)
60 (defvar *namespace-uri*)
61 (defvar *ns*)
62 (defvar *entity-resolver*)
63 (defvar *external-href-stack*)
64 (defvar *include-uri-stack*)
65 (defvar *include-body-p* nil)
66 (defvar *grammar*)
68 (defvar *debug* nil)
70 (defstruct (parsed-grammar (:constructor make-parsed-grammar (pattern)))
71 (pattern (missing) :type pattern)
72 (interned-start nil :type (or null pattern))
73 (registratur nil :type (or null hash-table)))
75 (defun invoke-with-klacks-handler (fn source)
76 (if *debug*
77 (funcall fn)
78 (handler-case
79 (funcall fn)
80 (cxml:xml-parse-error (c)
81 (rng-error source "Cannot parse schema: ~A" c)))))
83 (defun parse-relax-ng (input &key entity-resolver)
84 (klacks:with-open-source (source (cxml:make-source input))
85 (invoke-with-klacks-handler
86 (lambda ()
87 (klacks:find-event source :start-element)
88 (let* ((*datatype-library* "")
89 (*namespace-uri* "")
90 (*entity-resolver* entity-resolver)
91 (*external-href-stack* '())
92 (*include-uri-stack* '())
93 (*grammar* (make-grammar nil))
94 (result (p/pattern source)))
95 (unless result
96 (rng-error nil "empty grammar"))
97 (setf (grammar-start *grammar*)
98 (make-definition :name :start :child result))
99 (check-pattern-definitions source *grammar*)
100 (check-recursion result 0)
101 (setf result (fold-not-allowed result))
102 (setf result (fold-empty result))
103 (make-parsed-grammar result)))
104 source)))
107 ;;;; pattern structures
109 (defstruct pattern)
111 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
112 child)
114 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
115 name)
116 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-")))
117 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-")))
119 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
120 a b)
121 (defstruct (group
122 (:include %combination)
123 (:constructor make-group (a b))))
124 (defstruct (interleave
125 (:include %combination)
126 (:constructor make-interleave (a b))))
127 (defstruct (choice
128 (:include %combination)
129 (:constructor make-choice (a b))))
130 (defstruct (after
131 (:include %combination)
132 (:constructor make-after (a b))))
134 (defstruct (one-or-more
135 (:include %parent)
136 (:constructor make-one-or-more (child))))
137 (defstruct (list-pattern
138 (:include %parent)
139 (:constructor make-list-pattern (child))))
141 (defstruct (ref
142 (:include pattern)
143 (:conc-name "PATTERN-")
144 (:constructor make-ref (target)))
145 crdepth
146 target)
148 (defstruct (%leaf (:include pattern)))
150 (defstruct (empty (:include %leaf) (:conc-name "PATTERN-")))
151 (defstruct (text (:include %leaf) (:conc-name "PATTERN-")))
153 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
154 type)
156 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
158 string)
160 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
161 params
162 except)
164 (defstruct (not-allowed (:include %leaf) (:conc-name "PATTERN-")))
167 ;;;; non-pattern
169 (defstruct (grammar (:constructor make-grammar (parent)))
170 (start nil)
171 parent
172 (definitions (make-hash-table :test 'equal)))
174 (defstruct param
175 name
176 string)
178 ;; Clark calls this structure "RefPattern"
179 (defstruct (definition (:conc-name "DEFN-"))
180 name
181 combine-method
182 head-p
183 redefinition
184 child)
187 ;;; name-class
189 (defun missing ()
190 (error "missing arg"))
192 (defstruct name-class)
194 (defstruct (any-name (:include name-class)
195 (:constructor make-any-name (except)))
196 (except (missing) :type (or null name-class)))
198 (defstruct (name (:include name-class)
199 (:constructor make-name (uri lname)))
200 (uri (missing) :type string)
201 (lname (missing) :type string))
203 (defstruct (ns-name (:include name-class)
204 (:constructor make-ns-name (uri except)))
205 (uri (missing) :type string)
206 (except (missing) :type (or null name-class)))
208 (defstruct (name-class-choice (:include name-class)
209 (:constructor make-name-class-choice (a b)))
210 (a (missing) :type name-class)
211 (b (missing) :type name-class))
213 (defun simplify-nc-choice (values)
214 (zip #'make-name-class-choice values))
217 ;;;; parser
219 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
221 (defun skip-foreign* (source)
222 (loop
223 (case (klacks:peek-next source)
224 (:start-element (skip-foreign source))
225 (:end-element (return)))))
227 (defun skip-to-native (source)
228 (loop
229 (case (klacks:peek source)
230 (:start-element
231 (when (equal (klacks:current-uri source) *rng-namespace*)
232 (return))
233 (klacks:serialize-element source nil))
234 (:end-element (return)))
235 (klacks:consume source)))
237 (defun consume-and-skip-to-native (source)
238 (klacks:consume source)
239 (skip-to-native source))
241 (defun skip-foreign (source)
242 (when (equal (klacks:current-uri source) *rng-namespace*)
243 (rng-error source
244 "invalid schema: ~A not allowed here"
245 (klacks:current-lname source)))
246 (klacks:serialize-element source nil))
248 (defun attribute (lname attrs)
249 (let ((a (sax:find-attribute-ns "" lname attrs)))
250 (if a
251 (sax:attribute-value a)
252 nil)))
254 (defparameter *whitespace*
255 (format nil "~C~C~C~C"
256 (code-char 9)
257 (code-char 32)
258 (code-char 13)
259 (code-char 10)))
261 (defun ntc (lname source-or-attrs)
262 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
263 (let* ((attrs
264 (if (listp source-or-attrs)
265 source-or-attrs
266 (klacks:list-attributes source-or-attrs)))
267 (a (sax:find-attribute-ns "" lname attrs)))
268 (if a
269 (string-trim *whitespace* (sax:attribute-value a))
270 nil)))
272 (defmacro with-library-and-ns (attrs &body body)
273 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
275 (defun invoke-with-library-and-ns (fn attrs)
276 (let* ((dl (attribute "datatypeLibrary" attrs))
277 (ns (attribute "ns" attrs))
278 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
279 (*namespace-uri* (or ns *namespace-uri*))
280 (*ns* ns))
281 (funcall fn)))
283 (defun p/pattern (source)
284 (let* ((lname (klacks:current-lname source))
285 (attrs (klacks:list-attributes source)))
286 (with-library-and-ns attrs
287 (case (find-symbol lname :keyword)
288 (:|element| (p/element source (ntc "name" attrs)))
289 (:|attribute| (p/attribute source (ntc "name" attrs)))
290 (:|group| (p/combination #'groupify source))
291 (:|interleave| (p/combination #'interleave-ify source))
292 (:|choice| (p/combination #'choice-ify source))
293 (:|optional| (p/optional source))
294 (:|zeroOrMore| (p/zero-or-more source))
295 (:|oneOrMore| (p/one-or-more source))
296 (:|list| (p/list source))
297 (:|mixed| (p/mixed source))
298 (:|ref| (p/ref source))
299 (:|parentRef| (p/parent-ref source))
300 (:|empty| (p/empty source))
301 (:|text| (p/text source))
302 (:|value| (p/value source))
303 (:|data| (p/data source))
304 (:|notAllowed| (p/not-allowed source))
305 (:|externalRef| (p/external-ref source))
306 (:|grammar| (p/grammar source))
307 (t (skip-foreign source))))))
309 (defun p/pattern+ (source)
310 (let ((children nil))
311 (loop
312 (case (klacks:peek source)
313 (:start-element
314 (let ((p (p/pattern source))) (when p (push p children))))
315 (:end-element
316 (return))
318 (klacks:consume source))))
319 (unless children
320 (rng-error source "empty element"))
321 (nreverse children)))
323 (defun p/pattern? (source)
324 (let ((result nil))
325 (loop
326 (skip-to-native source)
327 (case (klacks:peek source)
328 (:start-element
329 (when result
330 (rng-error source "at most one pattern expected here"))
331 (setf result (p/pattern source)))
332 (:end-element
333 (return))
335 (klacks:consume source))))
336 result))
338 (defun p/element (source name)
339 (klacks:expecting-element (source "element")
340 (let ((result (make-element)))
341 (consume-and-skip-to-native source)
342 (if name
343 (setf (pattern-name result) (destructure-name source name))
344 (setf (pattern-name result) (p/name-class source)))
345 (skip-to-native source)
346 (setf (pattern-child result) (groupify (p/pattern+ source)))
347 result)))
349 (defvar *attribute-namespace-p* nil)
351 (defun p/attribute (source name)
352 (klacks:expecting-element (source "attribute")
353 (let ((result (make-attribute)))
354 (consume-and-skip-to-native source)
355 (if name
356 (setf (pattern-name result)
357 (let ((*namespace-uri* (or *ns* "")))
358 (destructure-name source name)))
359 (setf (pattern-name result)
360 (let ((*attribute-namespace-p* t))
361 (p/name-class source))))
362 (skip-to-native source)
363 (setf (pattern-child result)
364 (or (p/pattern? source) (make-text)))
365 result)))
367 (defun p/combination (zipper source)
368 (klacks:expecting-element (source)
369 (consume-and-skip-to-native source)
370 (funcall zipper (p/pattern+ source))))
372 (defun p/one-or-more (source)
373 (klacks:expecting-element (source "oneOrMore")
374 (consume-and-skip-to-native source)
375 (let ((children (p/pattern+ source)))
376 (make-one-or-more (groupify children)))))
378 (defun p/zero-or-more (source)
379 (klacks:expecting-element (source "zeroOrMore")
380 (consume-and-skip-to-native source)
381 (let ((children (p/pattern+ source)))
382 (make-choice (make-one-or-more (groupify children))
383 (make-empty)))))
385 (defun p/optional (source)
386 (klacks:expecting-element (source "optional")
387 (consume-and-skip-to-native source)
388 (let ((children (p/pattern+ source)))
389 (make-choice (groupify children) (make-empty)))))
391 (defun p/list (source)
392 (klacks:expecting-element (source "list")
393 (consume-and-skip-to-native source)
394 (let ((children (p/pattern+ source)))
395 (make-list-pattern (groupify children)))))
397 (defun p/mixed (source)
398 (klacks:expecting-element (source "mixed")
399 (consume-and-skip-to-native source)
400 (let ((children (p/pattern+ source)))
401 (make-interleave (groupify children) (make-text)))))
403 (defun p/ref (source)
404 (klacks:expecting-element (source "ref")
405 (prog1
406 (let* ((name (ntc "name" source))
407 (pdefinition
408 (or (find-definition name)
409 (setf (find-definition name)
410 (make-definition :name name :child nil)))))
411 (make-ref pdefinition))
412 (skip-foreign* source))))
414 (defun p/parent-ref (source)
415 (klacks:expecting-element (source "parentRef")
416 (prog1
417 (let* ((name (ntc "name" source))
418 (grammar (grammar-parent *grammar*))
419 (pdefinition
420 (or (find-definition name grammar)
421 (setf (find-definition name grammar)
422 (make-definition :name name :child nil)))))
423 (make-ref pdefinition))
424 (skip-foreign* source))))
426 (defun p/empty (source)
427 (klacks:expecting-element (source "empty")
428 (skip-foreign* source)
429 (make-empty)))
431 (defun p/text (source)
432 (klacks:expecting-element (source "text")
433 (skip-foreign* source)
434 (make-text)))
436 (defun consume-and-parse-characters (source)
437 ;; fixme
438 (let ((tmp ""))
439 (loop
440 (multiple-value-bind (key data) (klacks:peek-next source)
441 (case key
442 (:characters
443 (setf tmp (concatenate 'string tmp data)))
444 (:end-element (return)))))
445 tmp))
447 (defun p/value (source)
448 (klacks:expecting-element (source "value")
449 (let* ((type (ntc "type" source))
450 (string (consume-and-parse-characters source))
451 (ns *namespace-uri*)
452 (dl *datatype-library*))
453 (unless type
454 (setf type "token")
455 (setf dl ""))
456 (let ((ti
457 (cxml-types:find-type (and dl (find-symbol dl :keyword)) type)))
458 (unless ti
459 (rng-error source "type not found: ~A/~A" type dl))
460 (make-value :string string :type ti :ns ns)))))
462 (defun p/data (source)
463 (klacks:expecting-element (source "data")
464 (let* ((type (ntc "type" source))
465 (params '())
466 (except nil))
467 (loop
468 (multiple-value-bind (key uri lname)
469 (klacks:peek-next source)
471 (case key
472 (:start-element
473 (case (find-symbol lname :keyword)
474 (:|param| (push (p/param source) params))
475 (:|except|
476 (setf except (p/except-pattern source))
477 (skip-to-native source)
478 (return))
479 (t (skip-foreign source))))
480 (:end-element
481 (return)))))
482 (setf params (nreverse params))
483 (let* ((dl *datatype-library*)
484 (ti (apply #'cxml-types:find-type
485 (and dl (find-symbol dl :keyword))
486 type
487 (loop
488 for p in params
489 collect (find-symbol (string-invertcase
490 (param-name p))
491 :keyword)
492 collect (param-string p)))))
493 (unless ti
494 (rng-error source "type not found: ~A/~A" type dl))
495 (make-data
496 :type ti
497 :params params
498 :except except)))))
500 (defun string-invertcase (str)
501 (loop
502 with result = (copy-seq str)
503 for c across str
504 for i from 0
506 (setf (char result i)
507 (if (lower-case-p c)
508 (char-upcase c)
509 (char-downcase c)))
510 finally (return result)))
512 (defun p/param (source)
513 (klacks:expecting-element (source "param")
514 (let ((name (ntc "name" source))
515 (string (consume-and-parse-characters source)))
516 (make-param :name name :string string))))
518 (defun p/except-pattern (source)
519 (klacks:expecting-element (source "except")
520 (with-library-and-ns (klacks:list-attributes source)
521 (klacks:consume source)
522 (choice-ify (p/pattern+ source)))))
524 (defun p/not-allowed (source)
525 (klacks:expecting-element (source "notAllowed")
526 (consume-and-skip-to-native source)
527 (make-not-allowed)))
529 (defun safe-parse-uri (source str &optional base)
530 (when (zerop (length str))
531 (rng-error source "missing URI"))
532 (handler-case
533 (if base
534 (puri:merge-uris str base)
535 (puri:parse-uri str))
536 (puri:uri-parse-error ()
537 (rng-error source "invalid URI: ~A" str))))
539 (defun p/external-ref (source)
540 (klacks:expecting-element (source "externalRef")
541 (let* ((href
542 (escape-uri (attribute "href" (klacks:list-attributes source))))
543 (base (klacks:current-xml-base source))
544 (uri (safe-parse-uri source href base)))
545 (when (find uri *include-uri-stack* :test #'puri:uri=)
546 (rng-error source "looping include"))
547 (prog1
548 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
549 (xstream
550 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
551 (klacks:with-open-source (source (cxml:make-source xstream))
552 (invoke-with-klacks-handler
553 (lambda ()
554 (klacks:find-event source :start-element)
555 (let ((*datatype-library* ""))
556 (p/pattern source)))
557 source)))
558 (skip-foreign* source)))))
560 (defun p/grammar (source &optional grammar)
561 (klacks:expecting-element (source "grammar")
562 (consume-and-skip-to-native source)
563 (let ((*grammar* (or grammar (make-grammar *grammar*)))
564 (includep grammar))
565 (process-grammar-content* source)
566 (unless (or includep (grammar-start *grammar*))
567 (rng-error source "no <start> in grammar"))
568 (check-pattern-definitions source *grammar*)
569 (unless includep
570 (defn-child (grammar-start *grammar*))))))
572 (defvar *include-start*)
573 (defvar *include-definitions*)
575 (defun process-grammar-content* (source &key disallow-include)
576 (loop
577 (multiple-value-bind (key uri lname) (klacks:peek source)
579 (case key
580 (:start-element
581 (with-library-and-ns (klacks:list-attributes source)
582 (case (find-symbol lname :keyword)
583 (:|start| (process-start source))
584 (:|define| (process-define source))
585 (:|div| (process-div source))
586 (:|include|
587 (when disallow-include
588 (rng-error source "nested include not permitted"))
589 (process-include source))
591 (skip-foreign source)))))
592 (:end-element
593 (return))))
594 (klacks:consume source)))
596 (defun process-start (source)
597 (klacks:expecting-element (source "start")
598 (let* ((combine0 (ntc "combine" source))
599 (combine
600 (when combine0
601 (find-symbol (string-upcase combine0) :keyword)))
602 (child
603 (progn
604 (consume-and-skip-to-native source)
605 (p/pattern source)))
606 (pdefinition (grammar-start *grammar*)))
607 (skip-foreign* source)
608 ;; fixme: shared code with process-define
609 (unless pdefinition
610 (setf pdefinition (make-definition :name :start :child nil))
611 (setf (grammar-start *grammar*) pdefinition))
612 (when *include-body-p*
613 (setf *include-start* pdefinition))
614 (cond
615 ((defn-child pdefinition)
616 (ecase (defn-redefinition pdefinition)
617 (:not-being-redefined
618 (when (and combine
619 (defn-combine-method pdefinition)
620 (not (eq combine
621 (defn-combine-method pdefinition))))
622 (rng-error source "conflicting combine values for <start>"))
623 (unless combine
624 (when (defn-head-p pdefinition)
625 (rng-error source "multiple definitions for <start>"))
626 (setf (defn-head-p pdefinition) t))
627 (unless (defn-combine-method pdefinition)
628 (setf (defn-combine-method pdefinition) combine))
629 (setf (defn-child pdefinition)
630 (case (defn-combine-method pdefinition)
631 (:choice
632 (make-choice (defn-child pdefinition) child))
633 (:interleave
634 (make-interleave (defn-child pdefinition) child)))))
635 (:being-redefined-and-no-original
636 (setf (defn-redefinition pdefinition)
637 :being-redefined-and-original))
638 (:being-redefined-and-original)))
640 (setf (defn-child pdefinition) child)
641 (setf (defn-combine-method pdefinition) combine)
642 (setf (defn-head-p pdefinition) (null combine))
643 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
645 (defun zip (constructor children)
646 (cond
647 ((null children)
648 (rng-error nil "empty choice?"))
649 ((null (cdr children))
650 (car children))
652 (destructuring-bind (a b &rest rest)
653 children
654 (zip constructor (cons (funcall constructor a b) rest))))))
656 (defun choice-ify (children) (zip #'make-choice children))
657 (defun groupify (children) (zip #'make-group children))
658 (defun interleave-ify (children) (zip #'make-interleave children))
660 (defun find-definition (name &optional (grammar *grammar*))
661 (gethash name (grammar-definitions grammar)))
663 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
664 (setf (gethash name (grammar-definitions grammar)) newval))
666 (defun process-define (source)
667 (klacks:expecting-element (source "define")
668 (let* ((name (ntc "name" source))
669 (combine0 (ntc "combine" source))
670 (combine (when combine0
671 (find-symbol (string-upcase combine0) :keyword)))
672 (child (groupify
673 (progn
674 (consume-and-skip-to-native source)
675 (p/pattern+ source))))
676 (pdefinition (find-definition name)))
677 (unless pdefinition
678 (setf pdefinition (make-definition :name name :child nil))
679 (setf (find-definition name) pdefinition))
680 (when *include-body-p*
681 (push pdefinition *include-definitions*))
682 (cond
683 ((defn-child pdefinition)
684 (case (defn-redefinition pdefinition)
685 (:not-being-redefined
686 (when (and combine
687 (defn-combine-method pdefinition)
688 (not (eq combine
689 (defn-combine-method pdefinition))))
690 (rng-error source "conflicting combine values for ~A" name))
691 (unless combine
692 (when (defn-head-p pdefinition)
693 (rng-error source "multiple definitions for ~A" name))
694 (setf (defn-head-p pdefinition) t))
695 (unless (defn-combine-method pdefinition)
696 (setf (defn-combine-method pdefinition) combine))
697 (setf (defn-child pdefinition)
698 (case (defn-combine-method pdefinition)
699 (:choice
700 (make-choice (defn-child pdefinition) child))
701 (:interleave
702 (make-interleave (defn-child pdefinition) child)))))
703 (:being-redefined-and-no-original
704 (setf (defn-redefinition pdefinition)
705 :being-redefined-and-original))
706 (:being-redefined-and-original)))
708 (setf (defn-child pdefinition) child)
709 (setf (defn-combine-method pdefinition) combine)
710 (setf (defn-head-p pdefinition) (null combine))
711 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
713 (defun process-div (source)
714 (klacks:expecting-element (source "div")
715 (consume-and-skip-to-native source)
716 (process-grammar-content* source)))
718 (defun reset-definition-for-include (defn)
719 (setf (defn-combine-method defn) nil)
720 (setf (defn-redefinition defn) :being-redefined-and-no-original)
721 (setf (defn-head-p defn) nil))
723 (defun restore-definition (defn original)
724 (setf (defn-combine-method defn) (defn-combine-method original))
725 (setf (defn-redefinition defn) (defn-redefinition original))
726 (setf (defn-head-p defn) (defn-head-p original)))
728 (defun process-include (source)
729 (klacks:expecting-element (source "include")
730 (let* ((href
731 (escape-uri (attribute "href" (klacks:list-attributes source))))
732 (base (klacks:current-xml-base source))
733 (uri (safe-parse-uri source href base))
734 (*include-start* nil)
735 (*include-definitions* '()))
736 (consume-and-skip-to-native source)
737 (let ((*include-body-p* t))
738 (process-grammar-content* source :disallow-include t))
739 (let ((tmp-start
740 (when *include-start*
741 (prog1
742 (copy-structure *include-start*)
743 (reset-definition-for-include *include-start*))))
744 (tmp-defns
745 (loop
746 for defn in *include-definitions*
747 collect
748 (prog1
749 (copy-structure defn)
750 (reset-definition-for-include defn)))))
751 (when (find uri *include-uri-stack* :test #'puri:uri=)
752 (rng-error source "looping include"))
753 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
754 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri)))
755 (klacks:with-open-source (source (cxml:make-source xstream))
756 (invoke-with-klacks-handler
757 (lambda ()
758 (klacks:find-event source :start-element)
759 (let ((*datatype-library* ""))
760 (p/grammar source *grammar*)))
761 source))
762 (check-pattern-definitions source *grammar*)
763 (when tmp-start
764 (restore-definition *include-start* tmp-start))
765 (dolist (copy tmp-defns)
766 (let ((defn (gethash (defn-name copy)
767 (grammar-definitions *grammar*))))
768 (restore-definition defn copy)))
769 nil)))))
771 (defun check-pattern-definitions (source grammar)
772 (when (and (grammar-start grammar)
773 (eq (defn-redefinition (grammar-start grammar))
774 :being-redefined-and-no-original))
775 (rng-error source "start not found in redefinition of grammar"))
776 (loop for defn being each hash-value in (grammar-definitions grammar) do
777 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
778 (rng-error source "redefinition not found in grammar"))
779 (unless (defn-child defn)
780 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
782 (defvar *any-name-allowed-p* t)
783 (defvar *ns-name-allowed-p* t)
785 (defun destructure-name (source qname)
786 (multiple-value-bind (uri lname)
787 (klacks:decode-qname qname source)
788 (setf uri (or uri *namespace-uri*))
789 (when (and *attribute-namespace-p*
790 (or (and (equal lname "xmlns") (equal uri ""))
791 (equal uri "http://www.w3.org/2000/xmlns")))
792 (rng-error source "namespace attribute not permitted"))
793 (make-name uri lname)))
795 (defun p/name-class (source)
796 (klacks:expecting-element (source)
797 (with-library-and-ns (klacks:list-attributes source)
798 (case (find-symbol (klacks:current-lname source) :keyword)
799 (:|name|
800 (let ((qname (string-trim *whitespace*
801 (consume-and-parse-characters source))))
802 (destructure-name source qname)))
803 (:|anyName|
804 (unless *any-name-allowed-p*
805 (rng-error source "anyname now permitted in except"))
806 (klacks:consume source)
807 (prog1
808 (let ((*any-name-allowed-p* nil))
809 (make-any-name (p/except-name-class? source)))
810 (skip-to-native source)))
811 (:|nsName|
812 (unless *ns-name-allowed-p*
813 (rng-error source "nsname now permitted in except"))
814 (let ((uri *namespace-uri*)
815 (*any-name-allowed-p* nil)
816 (*ns-name-allowed-p* nil))
817 (when (and *attribute-namespace-p*
818 (equal uri "http://www.w3.org/2000/xmlns"))
819 (rng-error source "namespace attribute not permitted"))
820 (klacks:consume source)
821 (prog1
822 (make-ns-name uri (p/except-name-class? source))
823 (skip-to-native source))))
824 (:|choice|
825 (klacks:consume source)
826 (simplify-nc-choice (p/name-class* source)))
828 (rng-error source "invalid child in except"))))))
830 (defun p/name-class* (source)
831 (let ((results nil))
832 (loop
833 (skip-to-native source)
834 (case (klacks:peek source)
835 (:start-element (push (p/name-class source) results))
836 (:end-element (return)))
837 (klacks:consume source))
838 (nreverse results)))
840 (defun p/except-name-class? (source)
841 (skip-to-native source)
842 (multiple-value-bind (key uri lname)
843 (klacks:peek source)
845 (if (and (eq key :start-element)
846 (string= (find-symbol lname :keyword) "except"))
847 (p/except-name-class source)
848 nil)))
850 (defun p/except-name-class (source)
851 (klacks:expecting-element (source "except")
852 (with-library-and-ns (klacks:list-attributes source)
853 (klacks:consume source)
854 (let ((x (p/name-class* source)))
855 (if (cdr x)
856 (simplify-nc-choice x)
857 (car x))))))
859 (defun escape-uri (string)
860 (with-output-to-string (out)
861 (loop for c across (cxml::rod-to-utf8-string string) do
862 (let ((code (char-code c)))
863 ;; http://www.w3.org/TR/xlink/#link-locators
864 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
865 (format out "%~2,'0X" code)
866 (write-char c out))))))
869 ;;;; unparsing
871 (defvar *definitions-to-names*)
872 (defvar *seen-names*)
874 (defun serialization-name (defn)
875 (or (gethash defn *definitions-to-names*)
876 (setf (gethash defn *definitions-to-names*)
877 (let ((name (if (gethash (defn-name defn) *seen-names*)
878 (format nil "~A-~D"
879 (defn-name defn)
880 (hash-table-count *seen-names*))
881 (defn-name defn))))
882 (setf (gethash name *seen-names*) defn)
883 name))))
885 (defun serialize-grammar (grammar sink)
886 (cxml:with-xml-output sink
887 (let ((*definitions-to-names* (make-hash-table))
888 (*seen-names* (make-hash-table :test 'equal)))
889 (cxml:with-element "grammar"
890 (cxml:with-element "start"
891 (serialize-pattern (parsed-grammar-pattern grammar)))
892 (loop for defn being each hash-key in *definitions-to-names* do
893 (serialize-definition defn))))))
895 (defun serialize-pattern (pattern)
896 (etypecase pattern
897 (element
898 (cxml:with-element "element"
899 (serialize-name (pattern-name pattern))
900 (serialize-pattern (pattern-child pattern))))
901 (attribute
902 (cxml:with-element "attribute"
903 (serialize-name (pattern-name pattern))
904 (serialize-pattern (pattern-child pattern))))
905 (%combination
906 (cxml:with-element
907 (etypecase pattern
908 (group "group")
909 (interleave "interleave")
910 (choice "choice"))
911 (serialize-pattern (pattern-a pattern))
912 (serialize-pattern (pattern-b pattern))))
913 (one-or-more
914 (cxml:with-element "oneOrmore"
915 (serialize-pattern (pattern-child pattern))))
916 (list-pattern
917 (cxml:with-element "list"
918 (serialize-pattern (pattern-child pattern))))
919 (ref
920 (cxml:with-element "ref"
921 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
922 (empty
923 (cxml:with-element "empty"))
924 (not-allowed
925 (cxml:with-element "notAllowed"))
926 (text
927 (cxml:with-element "text"))
928 (value
929 (cxml:with-element "value"
930 (let ((type (pattern-type pattern)))
931 (cxml:attribute "datatype-library"
932 (symbol-name (cxml-types:type-library type)))
933 (cxml:attribute "type" (cxml-types:type-name type)))
934 (cxml:attribute "ns" (pattern-ns pattern))
935 (cxml:text (pattern-string pattern))))
936 (data
937 (cxml:with-element "value"
938 (let ((type (pattern-type pattern)))
939 (cxml:attribute "datatype-library"
940 (symbol-name (cxml-types:type-library type)))
941 (cxml:attribute "type" (cxml-types:type-name type)))
942 (dolist (param (pattern-params pattern))
943 (cxml:with-element "param"
944 (cxml:attribute "name" (param-name param))
945 (cxml:text (param-string param))))
946 (when (pattern-except pattern)
947 (cxml:with-element "except"
948 (serialize-pattern (pattern-except pattern))))))))
950 (defun serialize-definition (defn)
951 (cxml:with-element "define"
952 (cxml:attribute "name" (serialization-name defn))
953 (serialize-pattern (defn-child defn))))
955 (defun serialize-name (name)
956 (etypecase name
957 (name
958 (cxml:with-element "name"
959 (cxml:attribute "ns" (name-uri name))
960 (cxml:text (name-lname name))))
961 (any-name
962 (cxml:with-element "anyName"
963 (when (any-name-except name)
964 (serialize-except-name (any-name-except name)))))
965 (ns-name
966 (cxml:with-element "anyName"
967 (cxml:attribute "ns" (ns-name-uri name))
968 (when (ns-name-except name)
969 (serialize-except-name (ns-name-except name)))))
970 (name-class-choice
971 (cxml:with-element "choice"
972 (serialize-name (name-class-choice-a name))
973 (serialize-name (name-class-choice-b name))))))
975 (defun serialize-except-name (spec)
976 (cxml:with-element "except"
977 (serialize-name (cdr spec))))
980 ;;;; simplification
982 ;;; 4.1 Annotations
983 ;;; Foreign attributes and elements are removed implicitly while parsing.
985 ;;; 4.2 Whitespace
986 ;;; All character data is discarded while parsing (which can only be
987 ;;; whitespace after validation).
989 ;;; Whitespace in name, type, and combine attributes is stripped while
990 ;;; parsing. Ditto for <name/>.
992 ;;; 4.3. datatypeLibrary attribute
993 ;;; Escaping is done by p/pattern.
994 ;;; Attribute value defaulting is done using *datatype-library*; only
995 ;;; p/data and p/value record the computed value.
997 ;;; 4.4. type attribute of value element
998 ;;; Done by p/value.
1000 ;;; 4.5. href attribute
1001 ;;; Escaping is done by process-include and p/external-ref.
1003 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
1004 ;;; but that requires xstream hacking.
1006 ;;; 4.6. externalRef element
1007 ;;; Done by p/external-ref.
1009 ;;; 4.7. include element
1010 ;;; Done by process-include.
1012 ;;; 4.8. name attribute of element and attribute elements
1013 ;;; `name' is stored as a slot, not a child. Done by p/element and
1014 ;;; p/attribute.
1016 ;;; 4.9. ns attribute
1017 ;;; done by p/name-class, p/value, p/element, p/attribute
1019 ;;; 4.10. QNames
1020 ;;; done by p/name-class
1022 ;;; 4.11. div element
1023 ;;; Legen wir gar nicht erst an.
1025 ;;; 4.12. 4.13 4.14 4.15
1026 ;;; beim anlegen
1028 ;;; 4.16
1029 ;;; p/name-class
1030 ;;; -- ausser der sache mit den datentypen
1032 ;;; 4.17, 4.18, 4.19
1033 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
1034 ;;; beschrieben.
1036 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
1037 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
1038 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
1039 ;;; dafuer beim Serialisieren um.
1041 (defmethod check-recursion ((pattern element) depth)
1042 (check-recursion (pattern-child pattern) (1+ depth)))
1044 (defmethod check-recursion ((pattern ref) depth)
1045 (when (eql (pattern-crdepth pattern) depth)
1046 (rng-error nil "infinite recursion in ~A"
1047 (defn-name (pattern-target pattern))))
1048 (when (null (pattern-crdepth pattern))
1049 (setf (pattern-crdepth pattern) depth)
1050 (check-recursion (defn-child (pattern-target pattern)) depth)
1051 (setf (pattern-crdepth pattern) t)))
1053 (defmethod check-recursion ((pattern %parent) depth)
1054 (check-recursion (pattern-child pattern) depth))
1056 (defmethod check-recursion ((pattern %combination) depth)
1057 (check-recursion (pattern-a pattern) depth)
1058 (check-recursion (pattern-b pattern) depth))
1060 (defmethod check-recursion ((pattern %leaf) depth)
1061 (declare (ignore depth)))
1063 (defmethod check-recursion ((pattern data) depth)
1064 (when (pattern-except pattern)
1065 (check-recursion (pattern-except pattern) depth)))
1068 ;;;; 4.20
1070 ;;; %PARENT
1072 (defmethod fold-not-allowed ((pattern element))
1073 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1074 pattern)
1076 (defmethod fold-not-allowed ((pattern %parent))
1077 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1078 (if (typep (pattern-child pattern) 'not-allowed)
1079 (pattern-child pattern)
1080 pattern))
1082 ;;; %COMBINATION
1084 (defmethod fold-not-allowed ((pattern %combination))
1085 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1086 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1087 pattern)
1089 (defmethod fold-not-allowed ((pattern group))
1090 (call-next-method)
1091 (cond
1092 ;; remove if any child is not allowed
1093 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1094 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1095 (t pattern)))
1097 (defmethod fold-not-allowed ((pattern interleave))
1098 (call-next-method)
1099 (cond
1100 ;; remove if any child is not allowed
1101 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1102 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1103 (t pattern)))
1105 (defmethod fold-not-allowed ((pattern choice))
1106 (call-next-method)
1107 (cond
1108 ;; if any child is not allowed, choose the other
1109 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1110 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1111 (t pattern)))
1113 ;;; LEAF
1115 (defmethod fold-not-allowed ((pattern %leaf))
1116 pattern)
1118 (defmethod fold-not-allowed ((pattern data))
1119 (when (pattern-except pattern)
1120 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1121 (when (typep (pattern-except pattern) 'not-allowed)
1122 (setf (pattern-except pattern) nil)))
1123 pattern)
1125 ;;; REF
1127 (defmethod fold-not-allowed ((pattern ref))
1128 pattern)
1131 ;;;; 4.21
1133 ;;; %PARENT
1135 (defmethod fold-empty ((pattern one-or-more))
1136 (call-next-method)
1137 (if (typep (pattern-child pattern) 'empty)
1138 (pattern-child pattern)
1139 pattern))
1141 (defmethod fold-empty ((pattern %parent))
1142 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1143 pattern)
1145 ;;; %COMBINATION
1147 (defmethod fold-empty ((pattern %combination))
1148 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1149 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1150 pattern)
1152 (defmethod fold-empty ((pattern group))
1153 (call-next-method)
1154 (cond
1155 ;; if any child is empty, choose the other
1156 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1157 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1158 (t pattern)))
1160 (defmethod fold-empty ((pattern interleave))
1161 (call-next-method)
1162 (cond
1163 ;; if any child is empty, choose the other
1164 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1165 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1166 (t pattern)))
1168 (defmethod fold-empty ((pattern choice))
1169 (call-next-method)
1170 (if (typep (pattern-b pattern) 'empty)
1171 (cond
1172 ((typep (pattern-a pattern) 'empty)
1173 (pattern-a pattern))
1175 (rotatef (pattern-a pattern) (pattern-b pattern))
1176 pattern))
1177 pattern))
1179 ;;; LEAF
1181 (defmethod fold-empty ((pattern %leaf))
1182 pattern)
1184 (defmethod fold-empty ((pattern data))
1185 (when (pattern-except pattern)
1186 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1187 pattern)
1189 ;;; REF
1191 (defmethod fold-empty ((pattern ref))
1192 pattern)