dtd-tests korrigiert
[cxml-rng.git] / parse.lisp
bloba0d82d9773b2538366a45a9a7f546daa2b438617
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8 ;;;
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11 ;;;
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 (in-package :cxml-rng)
31 #+sbcl
32 (declaim (optimize (debug 2)))
35 ;;;; Errors
37 (define-condition rng-error (simple-error)
38 ((line-number :initarg :line-number :accessor rng-error-line-number)
39 (column-number :initarg :column-number :accessor rng-error-column-number)
40 (system-id :initarg :system-id :accessor rng-error-system-id))
41 (:documentation
42 "The class of all validation errors.
43 @see-slot{rng-error-line-number}
44 @see-slot{rng-error-column-number}
45 @see-slot{rng-error-system-id}"))
47 (define-condition dtd-compatibility-error (rng-error)
48 ())
50 (setf (documentation 'rng-error-line-number 'function)
51 "@arg[instance]{an instance of @class{rng-error}}
52 @return{an integer, or nil}
53 Return the line number reported by the parser when the Relax NG error
54 was detected, or NIL if not available.")
56 (setf (documentation 'rng-error-column-number 'function)
57 "@arg[instance]{an instance of @class{rng-error}}
58 @return{an integer, or nil}
59 Return the column number reported by the parser when the Relax NG error
60 was detected, or NIL if not available.")
62 (setf (documentation 'rng-error-system-id 'function)
63 "@arg[instance]{an instance of @class{rng-error}}
64 @return{a puri:uri, or nil}
65 Return the System ID of the document being parsed when the Relax NG
66 error was detected, or NIL if not available.")
68 (defun rng-error (source fmt &rest args)
69 "@unexport{}"
70 (let ((s (make-string-output-stream)))
71 (apply #'format s fmt args)
72 (multiple-value-bind (line-number column-number system-id)
73 (etypecase source
74 (null)
75 (klacks:source
76 (values (klacks:current-line-number source)
77 (klacks:current-column-number source)
78 (klacks:current-system-id source)))
79 (sax:sax-parser-mixin
80 (values (sax:line-number source)
81 (sax:column-number source)
82 (sax:system-id source))))
83 (when (or line-number column-number system-id)
84 (format s "~& [ Error at line ~D, column ~D in ~S ]"
85 line-number
86 column-number
87 system-id))
88 (error 'rng-error
89 :format-control "~A"
90 :format-arguments (list (get-output-stream-string s))
91 :line-number line-number
92 :column-number column-number
93 :system-id system-id))))
96 ;;;; Parser
98 (defvar *datatype-library*)
99 (defvar *namespace-uri*)
100 (defvar *ns*)
101 (defvar *entity-resolver*)
102 (defvar *external-href-stack*)
103 (defvar *include-uri-stack*)
104 (defvar *include-body-p* nil)
105 (defvar *grammar*)
107 (defvar *debug* nil)
109 (defstruct (schema
110 (:constructor make-schema (start definitions)))
111 "An instance of this class represents a Relax NG grammar that has
112 been parsed and simplified.
113 @see-slot{schema-start}
114 @see-constructor{parse-schema}
115 @see{make-validator}
116 @see{serialize-schema} "
117 (start (missing) :type pattern)
118 (definitions (missing) :type list)
119 (interned-start nil :type (or null pattern))
120 (registratur nil :type (or null hash-table)))
122 (setf (documentation 'schema-start 'function)
123 "@arg[instance]{an instance of @class{schema}}
124 @return{the start pattern, an instance of @class{pattern}}
125 Reader function for the grammar's start pattern, from which all
126 of the grammar's patters are reachable.")
128 (defmethod print-object ((object schema) stream)
129 (print-unreadable-object (object stream :type t :identity t)))
131 (defun invoke-with-klacks-handler (fn source)
132 (if *debug*
133 (funcall fn)
134 (handler-case
135 (funcall fn)
136 (cxml:xml-parse-error (c)
137 (rng-error source "Cannot parse schema: ~A" c)))))
139 (defvar *validate-grammar* t)
140 (defparameter *relax-ng-grammar* nil)
141 (defun flush () (setf *relax-ng-grammar* nil))
143 (defun make-validating-source (input schema)
144 "@arg[input]{a @code{source} or a stream designator}
145 @arg[schema]{the parsed Relax NG @class{schema} object}
146 @return{a klacks source}
147 @short{This function creates a klacks source for @code{input} that validates
148 events against @code{schema}.}
150 Input can be a klacks source or any argument applicable to
151 @code{cxml:make-source}.
153 @see{parse-schema}
154 @see{make-validator}"
155 (klacks:make-tapping-source (if (typep input 'klacks:source)
156 input
157 (cxml:make-source input))
158 (make-validator schema)))
160 (defun make-schema-source (input)
161 (let ((upstream (cxml:make-source input)))
162 (if *validate-grammar*
163 (make-validating-source upstream *relax-ng-grammar*)
164 upstream)))
166 (defun parse-schema (input &key entity-resolver)
167 "@arg[input]{a string, pathname, stream, or xstream}
168 @arg[entity-resolver]{a function of two arguments, or NIL}
169 @return{a parsed @class{schema}}
170 @short{This function parses a Relax NG schema file in XML syntax}
171 and returns a parsed representation of that schema.
173 @code{input} can be any stream designator as understood by
174 @code{cxml:make-source}.
176 Note that namestrings are not valid arguments,
177 because they would be interpreted as XML source code. Use pathnames
178 instead.
180 @code{entity-resolver} can be passed as a function of two arguments.
181 It is invoked for every entity referenced by the
182 document with the entity's Public ID (a rod) and System ID (an
183 URI object) as arguments. The function may either return
184 nil, CXML will then try to resolve the entity as usual.
185 Alternatively it may return a Common Lisp stream specialized on
186 @code{(unsigned-byte 8)} which will be used instead.
188 @see{parse-compact}
189 @see{make-validator}"
190 (when *validate-grammar*
191 (unless *relax-ng-grammar*
192 (setf *relax-ng-grammar*
193 (let* ((*validate-grammar* nil)
194 (d (slot-value (asdf:find-system :cxml-rng)
195 'asdf::relative-pathname)))
196 (parse-schema (merge-pathnames "rng.rng" d))
197 #+(or)
198 (parse-compact (merge-pathnames "rng.rnc" d))))))
199 (klacks:with-open-source (source (make-schema-source input))
200 (invoke-with-klacks-handler
201 (lambda ()
202 (klacks:find-event source :start-element)
203 (let* ((*datatype-library* "")
204 (*namespace-uri* "")
205 (*entity-resolver* entity-resolver)
206 (*external-href-stack* '())
207 (*include-uri-stack* '())
208 (*grammar* (make-grammar nil))
209 (start (p/pattern source)))
210 (unless start
211 (rng-error nil "empty grammar"))
212 (setf (grammar-start *grammar*)
213 (make-definition :name :start :child start))
214 (check-pattern-definitions source *grammar*)
215 (check-recursion start 0)
216 (multiple-value-bind (new-start defns)
217 (finalize-definitions start)
218 (setf start (fold-not-allowed new-start))
219 (dolist (defn defns)
220 (setf (defn-child defn) (fold-not-allowed (defn-child defn))))
221 (setf start (fold-empty start))
222 (dolist (defn defns)
223 (setf (defn-child defn) (fold-empty (defn-child defn)))))
224 (multiple-value-bind (new-start defns)
225 (finalize-definitions start)
226 (check-start-restrictions new-start)
227 (dolist (defn defns)
228 (check-restrictions (defn-child defn)))
229 (make-schema new-start defns))))
230 source)))
233 ;;;; pattern structures
235 (defstruct pattern
236 "@short{The superclass of all patterns.}
237 Instances of this class represent elements in the \"simplified syntax\"
238 of Relax NG.
240 Patterns are documented for introspective purposes and are not meant to
241 be modified by user code.
243 The start pattern of a schema is available through @fun{schema-start}.
245 @see{schema}"
246 (nullable :uninitialized))
248 (defmethod print-object :around ((object pattern) stream)
249 (if *debug*
250 (let ((*print-circle* t))
251 (call-next-method))
252 (print-unreadable-object (object stream :type t :identity t))))
254 (defstruct (%parent (:include pattern) (:conc-name "PATTERN-"))
255 child)
257 (defstruct (%named-pattern (:include %parent) (:conc-name "PATTERN-"))
258 name)
260 (setf (documentation 'pattern-name 'function)
261 "@arg[instance]{an instance of @class{pattern}}
262 @return{a @class{name-class}}
263 @short{Returns the @code{pattern}'s name class.}
265 This slot describes the name allowed for the current element or
266 attribute.
268 @see{element}
269 @see{attribute}")
271 (setf (documentation 'pattern-child 'function)
272 "@arg[instance]{an instance of @class{pattern}}
273 @return{an instance of @class{pattern}}
274 @short{Returns the pattern's sub-pattern.}
276 (Elements in the full Relax NG syntax allow more than one child
277 pattern, but simplification normalizes the representation so that
278 any such element has exactly one child.)
280 @see{element}
281 @see{attribute}
282 @see{one-or-more}
283 @see{list-pattern}
284 @see{choice}")
286 (defstruct (element (:include %named-pattern))
287 "@short{This pattern specifies that an element of a certain name class
288 is required.}
290 Its child pattern describes the attributes and child nodes
291 of this element.
292 @see-slot{pattern-name}
293 @see-slot{pattern-child}")
295 (defstruct (attribute (:include %named-pattern))
296 "@short{This pattern specifies that an attribute of a certain name class
297 is required.}
299 Its child pattern describes the type of the attribute's
300 contents.
301 @see-slot{pattern-name}
302 @see-slot{pattern-child}")
304 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
305 a b)
307 (setf (documentation 'pattern-a 'function)
308 "@arg[instance]{an instance of @class{pattern}}
309 @return{an instance of @class{pattern}}
310 @short{Returns the first of two sub-patterns the pattern instance has.}
312 (Elements in the full Relax NG syntax allow more than two child
313 patterns, but simplification normalizes the representation so that
314 any such element has exactly two children.)
316 @see{pattern-b}
317 @see{group}
318 @see{interleave}
319 @see{choice}")
321 (setf (documentation 'pattern-b 'function)
322 "@arg[instance]{an instance of @class{pattern}}
323 @return{an instance of @class{pattern}}
324 @short{Returns the second of two sub-patterns the pattern instance has.}
326 (Elements in the full Relax NG syntax allow more than two child
327 patterns, but simplification normalizes the representation so that
328 any such element has exactly two children.)
330 @see{pattern-a}
331 @see{group}
332 @see{interleave}
333 @see{choice}")
335 (defstruct (group
336 (:include %combination)
337 (:constructor make-group (a b)))
338 "@short{This pattern specifies that two subpatterns are
339 required at the current position in a specific order.}
341 @see-slot{pattern-a}
342 @see-slot{pattern-b}")
343 (defstruct (interleave
344 (:include %combination)
345 (:constructor make-interleave (a b)))
346 "@short{This pattern specifies that two possible subpatterns are
347 allowed to occur in any order at the current position.}
349 @see-slot{pattern-a}
350 @see-slot{pattern-b}")
351 (defstruct (choice
352 (:include %combination)
353 (:constructor make-choice (a b)))
354 "@short{This pattern specifies that one of two possible subpatterns are
355 allowed at the current position, given as its children.}
357 @see-slot{pattern-a}
358 @see-slot{pattern-b}")
359 (defstruct (after
360 (:include %combination)
361 (:constructor make-after (a b))))
363 (defstruct (one-or-more
364 (:include %parent)
365 (:constructor make-one-or-more (child)))
366 "@short{This pattern specifies that its subpattern is
367 allowed to occur at the current position one or more times.}
369 @see-slot{pattern-child}")
370 (defstruct (list-pattern
371 (:include %parent)
372 (:constructor make-list-pattern (child)))
373 "@short{This pattern specifies that a subpatterns is allowed multiple
374 times a the current position, with whitespace as a separator.}
376 @see-slot{pattern-child}")
378 (defstruct (ref
379 (:include pattern)
380 (:conc-name "PATTERN-")
381 (:constructor make-ref (target)))
382 "@short{This pattern references another part of the pattern graph.}
384 @code{ref} is the only pattern to introduce shared structure and
385 circularity into the pattern graph, by referring to elements defined
386 elsewhere.
388 (@code{ref} patterns in the full Relax NG syntax can be used to refer
389 to any pattern definition in the grammar. Simplification normalizes
390 the schema so that ref patterns only refer to definitions which have
391 an @code{element} as their child.)
393 @see-slot{pattern-element}"
394 crdepth
395 target)
397 (defun pattern-element (ref)
398 "@arg[ref]{an instance of @class{ref}}
399 @return{an instance of @class{element}}
400 @short{Returns the ref pattern's target.}
402 @code{ref} is the only pattern to introduce shared structure and
403 circularity into the pattern graph, by referring to elements defined
404 elsewhere.
406 (@code{ref} patterns in the full Relax NG syntax can be used to refer
407 to any pattern definition in the grammar. Simplification normalizes
408 the schema so that ref patterns only refer to definitions which have
409 an @code{element} as their child.)"
410 (defn-child (pattern-target ref)))
412 (defstruct (%leaf (:include pattern)))
414 (defstruct (empty (:include %leaf))
415 "@short{This pattern specifies that nothing more is expected at the current
416 position.}")
418 (defstruct (text (:include %leaf))
419 "@short{This pattern specifies that text is expected here.}")
421 (defstruct (%typed-pattern (:include %leaf) (:conc-name "PATTERN-"))
422 type)
424 (setf (documentation 'pattern-type 'function)
425 "@arg[instance]{an instance of @class{pattern}}
426 @return{a @class{cxml-types:data-type}}
427 @short{Returns the data type expected at this position.}
429 This type has already been parsed into an object. Its name and
430 the URI of its library can be queried from that object.
432 @see{data}
433 @see{value}
434 @see{cxml-types:type-name}
435 @see{cxml-types:type-library}")
437 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
438 "@short{This pattern specifies that a specific value is expected as text
439 here.}
441 The value expected is @code{pattern-value}, parsed from
442 @code{pattern-string} using @code{pattern-type}.
444 @see-slot{pattern-type}
445 @see-slot{pattern-value}
446 @see-slot{pattern-string}"
448 string
449 value)
451 (setf (documentation 'pattern-string 'function)
452 "@arg[instance]{an instance of @class{value}}
453 @return{a string}
454 @short{Returns the string expected at this position.}
456 This string is the lexical representation expected, not parsed into
457 a value object yet. The parsed object is available as
458 @fun{pattern-value}.
460 @see{pattern-type}")
462 (setf (documentation 'pattern-value 'function)
463 "@arg[instance]{an instance of @class{value}}
464 @return{an object as returned by @fun{cxml-types:parse}}
465 @short{Returns the value expected at this position.}
467 This object is the result of parsing @fun{pattern-string} using
468 @fun{pattern-type}.")
470 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
471 "@short{This pattern specifies that text of a specific data type is
472 expected.}
474 The data type instance stored in the @code{pattern-type} slot takes into
475 account additional paramaters, which can be retrieved using
476 @code{pattern-params} in their original form.
478 @see-slot{pattern-type}
479 @see-slot{pattern-params}
480 @see-slot{pattern-except}"
481 params
482 except)
484 (setf (documentation 'pattern-except 'function)
485 "@arg[instance]{an instance of @class{data}}
486 @return{a @class{pattern}, or @code{nil}}
487 @short{Returns the @code{data} instance's @code{except} pattern.}
489 In addition to a data type, @code{data} can specify that certain
490 values are @em{not} permitted. They are described using a pattern.
492 If this slot is @code{nil}, no exception is defined.")
494 (setf (documentation 'pattern-params 'function)
495 "@arg[instance]{an instance of @class{data}}
496 @return{a list of @fun{cxml-types:param}}
497 @short{The data type parameters for this data pattern.}
499 (With the XSD type library, these are known as restricting facets.)")
501 (defstruct (not-allowed (:include %leaf))
502 "@short{This pattern specifies that the part of the schema reached at
503 this point is not valid.}")
506 ;;;; non-pattern
508 (defstruct (grammar (:constructor make-grammar (parent)))
509 (start nil)
510 parent
511 (definitions (make-hash-table :test 'equal)))
513 ;; Clark calls this structure "RefPattern"
514 (defstruct (definition (:conc-name "DEFN-"))
515 name
516 combine-method
517 head-p
518 redefinition
519 child)
522 ;;; name-class
524 (defun missing ()
525 (error "missing arg"))
527 (defstruct name-class
528 "@short{The abstract superclass of all name-related classes.}
530 Name classes represent sets of permissible names for an element or
531 attribute.
533 Names are pairs of namespace URI and local-name.
535 @see{attribute}
536 @see{element}")
538 (defstruct (any-name (:include name-class)
539 (:constructor make-any-name (except)))
540 "@short{This name class allows any name.}
542 Exceptions are given as @code{any-name-except}.
544 @see-slot{any-name-except}"
545 (except (missing) :type (or null name-class)))
547 (setf (documentation 'any-name-except 'function)
548 "@arg[instance]{an instance of @class{any-name}}
549 @return{a @class{name-class} or @code{nil}}
551 Return the name class @em{not} allowed by this @code{any-name},
552 or @code{nil} if there is no such exception.")
554 (defstruct (name (:include name-class)
555 (:constructor make-name (uri lname)))
556 "@short{This name class allows only a specific name.}
558 A specific namespace URI and local name are expected.
560 @see-slot{name-uri}
561 @see-slot{name-lname}"
562 (uri (missing) :type string)
563 (lname (missing) :type string))
565 (setf (documentation 'name-uri 'function)
566 "@arg[instance]{an instance of @class{name}}
567 @return{a string}
568 Return the expected namespace URI.")
570 (setf (documentation 'name-lname 'function)
571 "@arg[instance]{an instance of @class{name}}
572 @return{a string}
573 Return the expected local name.")
575 (defstruct (ns-name (:include name-class)
576 (:constructor make-ns-name (uri except)))
577 "@short{This name class allows all names in a specific namespace}, with
578 possible exceptions.
580 A specific namespace URI is expected.
582 Exceptions are given as @code{ns-name-except}.
584 @see-slot{ns-name-uri}
585 @see-slot{ns-name-except}"
586 (uri (missing) :type string)
587 (except (missing) :type (or null name-class)))
589 (setf (documentation 'ns-name-uri 'function)
590 "@arg[instance]{an instance of @class{ns-name}}
591 @return{a string}
592 Return the expected namespace URI.")
594 (setf (documentation 'ns-name-except 'function)
595 "@arg[instance]{an instance of @class{ns-name}}
596 @return{a @class{name-class} or @code{nil}}
598 Return the name class @em{not} allowed by this @code{ns-name},
599 or @code{nil} if there is no such exception.")
601 (defstruct (name-class-choice (:include name-class)
602 (:constructor make-name-class-choice (a b)))
603 "@short{This name class represents the union of two other name classes.}
605 @see-slot{name-class-choice-a}
606 @see-slot{name-class-choice-b}"
607 (a (missing) :type name-class)
608 (b (missing) :type name-class))
610 (setf (documentation 'name-class-choice-a 'function)
611 "@arg[instance]{an instance of @class{name-class-choice}}
612 @return{a @class{name-class}}
613 Returns the 'first' of two name classes that are allowed.
614 @see{name-class-choice-b}")
616 (setf (documentation 'name-class-choice-b 'function)
617 "@arg[instance]{an instance of @class{name-class-choice}}
618 @return{a @class{name-class}}
619 Returns the 'second' of two name classes that are allowed.
620 @see{name-class-choice-a}")
622 (defun simplify-nc-choice (values)
623 (zip #'make-name-class-choice values))
626 ;;;; parser
628 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
630 (defun skip-foreign* (source)
631 (loop
632 (case (klacks:peek-next source)
633 (:start-element (skip-foreign source))
634 (:end-element (return)))))
636 (defun skip-to-native (source)
637 (loop
638 (case (klacks:peek source)
639 (:start-element
640 (when (equal (klacks:current-uri source) *rng-namespace*)
641 (return))
642 (klacks:serialize-element source nil))
643 (:end-element (return)))
644 (klacks:consume source)))
646 (defun consume-and-skip-to-native (source)
647 (klacks:consume source)
648 (skip-to-native source))
650 (defun skip-foreign (source)
651 (when (equal (klacks:current-uri source) *rng-namespace*)
652 (rng-error source
653 "invalid schema: ~A not allowed here"
654 (klacks:current-lname source)))
655 (klacks:serialize-element source nil))
657 (defun attribute (lname attrs)
658 "@unexport{}"
659 (let ((a (sax:find-attribute-ns "" lname attrs)))
660 (if a
661 (sax:attribute-value a)
662 nil)))
664 (defparameter *whitespace*
665 (format nil "~C~C~C~C"
666 (code-char 9)
667 (code-char 32)
668 (code-char 13)
669 (code-char 10)))
671 (defun ntc (lname source-or-attrs)
672 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
673 (let* ((attrs
674 (if (listp source-or-attrs)
675 source-or-attrs
676 (klacks:list-attributes source-or-attrs)))
677 (a (sax:find-attribute-ns "" lname attrs)))
678 (if a
679 (string-trim *whitespace* (sax:attribute-value a))
680 nil)))
682 (defmacro with-library-and-ns (attrs &body body)
683 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
685 (defun invoke-with-library-and-ns (fn attrs)
686 (let* ((dl (attribute "datatypeLibrary" attrs))
687 (ns (attribute "ns" attrs))
688 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
689 (*namespace-uri* (or ns *namespace-uri*))
690 (*ns* ns))
691 ;; FIXME: Ganz boese gehackt -- gerade so, dass wir die Relax NG
692 ;; Test-Suite bestehen.
693 (when (and dl
694 (not (zerop (length *datatype-library*)))
695 ;; scheme pruefen, und es muss was folgen
696 (or (not (cl-ppcre:all-matches
697 "^[a-zA-Z][a-zA-Z0-9+.-]*:.+"
698 *datatype-library*))
699 ;; keine kaputten %te, keine #
700 (cl-ppcre:all-matches
701 "(%$|%.$|%[^0-9A-Fa-f][^0-9A-Fa-f]|#)"
702 *datatype-library*)))
703 (rng-error nil "malformed datatypeLibrary: ~A" *datatype-library*))
704 (funcall fn)))
706 (defun p/pattern (source)
707 (let* ((lname (klacks:current-lname source))
708 (attrs (klacks:list-attributes source)))
709 (with-library-and-ns attrs
710 (case (find-symbol lname :keyword)
711 (:|element| (p/element source (ntc "name" attrs)))
712 (:|attribute| (p/attribute source (ntc "name" attrs)))
713 (:|group| (p/combination #'groupify source))
714 (:|interleave| (p/combination #'interleave-ify source))
715 (:|choice| (p/combination #'choice-ify source))
716 (:|optional| (p/optional source))
717 (:|zeroOrMore| (p/zero-or-more source))
718 (:|oneOrMore| (p/one-or-more source))
719 (:|list| (p/list source))
720 (:|mixed| (p/mixed source))
721 (:|ref| (p/ref source))
722 (:|parentRef| (p/parent-ref source))
723 (:|empty| (p/empty source))
724 (:|text| (p/text source))
725 (:|value| (p/value source))
726 (:|data| (p/data source))
727 (:|notAllowed| (p/not-allowed source))
728 (:|externalRef| (p/external-ref source))
729 (:|grammar| (p/grammar source))
730 (t (skip-foreign source))))))
732 (defun p/pattern+ (source)
733 (let ((children nil))
734 (loop
735 (case (klacks:peek source)
736 (:start-element
737 (let ((p (p/pattern source))) (when p (push p children))))
738 (:end-element
739 (return))
741 (klacks:consume source))))
742 (unless children
743 (rng-error source "empty element"))
744 (nreverse children)))
746 (defun p/pattern? (source)
747 (let ((result nil))
748 (loop
749 (skip-to-native source)
750 (case (klacks:peek source)
751 (:start-element
752 (when result
753 (rng-error source "at most one pattern expected here"))
754 (setf result (p/pattern source)))
755 (:end-element
756 (return))
758 (klacks:consume source))))
759 result))
761 (defun p/element (source name)
762 (klacks:expecting-element (source "element")
763 (let ((elt (make-element)))
764 (consume-and-skip-to-native source)
765 (if name
766 (setf (pattern-name elt) (destructure-name source name))
767 (setf (pattern-name elt) (p/name-class source)))
768 (skip-to-native source)
769 (setf (pattern-child elt) (groupify (p/pattern+ source)))
770 (make-ref (make-definition :name (gensym "ANONYMOUS") :child elt)))))
772 (defvar *attribute-namespace-p* nil)
774 (defun p/attribute (source name)
775 (klacks:expecting-element (source "attribute")
776 (let ((result (make-attribute)))
777 (consume-and-skip-to-native source)
778 (if name
779 (setf (pattern-name result)
780 (let ((*namespace-uri* (or *ns* ""))
781 (*attribute-namespace-p* t))
782 (destructure-name source name)))
783 (setf (pattern-name result)
784 (let ((*attribute-namespace-p* t))
785 (p/name-class source))))
786 (skip-to-native source)
787 (setf (pattern-child result)
788 (or (p/pattern? source) (make-text)))
789 result)))
791 (defun p/combination (zipper source)
792 (klacks:expecting-element (source)
793 (consume-and-skip-to-native source)
794 (funcall zipper (p/pattern+ source))))
796 (defun p/one-or-more (source)
797 (klacks:expecting-element (source "oneOrMore")
798 (consume-and-skip-to-native source)
799 (let ((children (p/pattern+ source)))
800 (make-one-or-more (groupify children)))))
802 (defun p/zero-or-more (source)
803 (klacks:expecting-element (source "zeroOrMore")
804 (consume-and-skip-to-native source)
805 (let ((children (p/pattern+ source)))
806 (make-choice (make-one-or-more (groupify children))
807 (make-empty)))))
809 (defun p/optional (source)
810 (klacks:expecting-element (source "optional")
811 (consume-and-skip-to-native source)
812 (let ((children (p/pattern+ source)))
813 (make-choice (groupify children) (make-empty)))))
815 (defun p/list (source)
816 (klacks:expecting-element (source "list")
817 (consume-and-skip-to-native source)
818 (let ((children (p/pattern+ source)))
819 (make-list-pattern (groupify children)))))
821 (defun p/mixed (source)
822 (klacks:expecting-element (source "mixed")
823 (consume-and-skip-to-native source)
824 (let ((children (p/pattern+ source)))
825 (make-interleave (groupify children) (make-text)))))
827 (defun p/ref (source)
828 (klacks:expecting-element (source "ref")
829 (prog1
830 (let* ((name (ntc "name" source))
831 (pdefinition
832 (or (find-definition name)
833 (setf (find-definition name)
834 (make-definition :name name :child nil)))))
835 (make-ref pdefinition))
836 (skip-foreign* source))))
838 (defun p/parent-ref (source)
839 (klacks:expecting-element (source "parentRef")
840 (prog1
841 (let* ((name (ntc "name" source))
842 (grammar (grammar-parent *grammar*))
843 (pdefinition
844 (or (find-definition name grammar)
845 (setf (find-definition name grammar)
846 (make-definition :name name :child nil)))))
847 (make-ref pdefinition))
848 (skip-foreign* source))))
850 (defun p/empty (source)
851 (klacks:expecting-element (source "empty")
852 (skip-foreign* source)
853 (make-empty)))
855 (defun p/text (source)
856 (klacks:expecting-element (source "text")
857 (skip-foreign* source)
858 (make-text)))
860 (defun consume-and-parse-characters (source)
861 ;; fixme
862 (let ((tmp ""))
863 (loop
864 (multiple-value-bind (key data) (klacks:peek-next source)
865 (case key
866 (:characters
867 (setf tmp (concatenate 'string tmp data)))
868 (:end-element (return)))))
869 tmp))
871 (defun p/value (source)
872 (klacks:expecting-element (source "value")
873 (let* ((type (ntc "type" source))
874 (string (consume-and-parse-characters source))
875 (ns *namespace-uri*)
876 (dl *datatype-library*))
877 (unless type
878 (setf type "token")
879 (setf dl ""))
880 (let ((data-type
881 (cxml-types:find-type (and dl (find-symbol dl :keyword))
882 type
883 nil))
884 (vc (cxml-types:make-klacks-validation-context source)))
885 (unless data-type
886 (rng-error source "type not found: ~A/~A" type dl))
887 (make-value :string string
888 :value (cxml-types:parse data-type string vc)
889 :type data-type
890 :ns ns)))))
892 (defun p/data (source)
893 (klacks:expecting-element (source "data")
894 (let* ((type (ntc "type" source))
895 (params '())
896 (except nil))
897 (loop
898 (multiple-value-bind (key uri lname)
899 (klacks:peek-next source)
901 (case key
902 (:start-element
903 (case (find-symbol lname :keyword)
904 (:|param| (push (p/param source) params))
905 (:|except|
906 (setf except (p/except-pattern source))
907 (skip-to-native source)
908 (return))
909 (t (skip-foreign source))))
910 (:end-element
911 (return)))))
912 (setf params (nreverse params))
913 (let* ((dl *datatype-library*)
914 (data-type (cxml-types:find-type
915 (and dl (find-symbol dl :keyword))
916 type
917 params)))
918 (unless data-type
919 (rng-error source "type not found: ~A/~A" type dl))
920 (when (eq data-type :error)
921 (rng-error source "params not valid for type: ~A/~A/~A"
922 type dl params))
923 (make-data
924 :type data-type
925 :params params
926 :except except)))))
928 (defun p/param (source)
929 (klacks:expecting-element (source "param")
930 (let ((name (ntc "name" source))
931 (string (consume-and-parse-characters source)))
932 (cxml-types:make-param name string))))
934 (defun p/except-pattern (source)
935 (klacks:expecting-element (source "except")
936 (with-library-and-ns (klacks:list-attributes source)
937 (klacks:consume source)
938 (choice-ify (p/pattern+ source)))))
940 (defun p/not-allowed (source)
941 (klacks:expecting-element (source "notAllowed")
942 (consume-and-skip-to-native source)
943 (make-not-allowed)))
945 (defun safe-parse-uri (source str &optional base)
946 (when (zerop (length str))
947 (rng-error source "missing URI"))
948 (let* ((compactp (rnc-uri-p str))
949 (str (if compactp (follow-rnc-uri str) str))
950 (uri
951 (handler-case
952 (if base
953 (puri:merge-uris str base)
954 (puri:parse-uri str))
955 (puri:uri-parse-error ()
956 (rng-error source "invalid URI: ~A" str)))))
957 (when (and (eq (puri:uri-scheme uri) :file)
958 (puri:uri-fragment uri))
959 (rng-error source "Forbidden fragment in URI: ~A" str))
960 (values uri compactp)))
962 (defun named-string-xstream (str uri)
963 (let ((xstream (cxml::string->xstream str)))
964 (setf (cxml::xstream-name xstream)
965 (cxml::make-stream-name
966 :entity-name "main document"
967 :entity-kind :main
968 :uri uri))
969 xstream))
971 (defun xstream-open-schema (uri compactp)
972 (if compactp
973 (named-string-xstream
974 (uncompact-file
975 ;; fixme: Hier waere es schon, mit *entity-resolver* arbeiten
976 ;; zu koennen, aber der liefert binaere Streams.
977 (open (cxml::uri-to-pathname uri)
978 :element-type 'character
979 :direction :input))
980 uri)
981 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
983 (defun p/external-ref (source)
984 (klacks:expecting-element (source "externalRef")
985 (let* ((href
986 (escape-uri (attribute "href" (klacks:list-attributes source))))
987 (base (klacks:current-xml-base source)))
988 (multiple-value-bind (uri compactp)
989 (safe-parse-uri source href base)
990 (when (find uri *include-uri-stack* :test #'puri:uri=)
991 (rng-error source "looping include"))
992 (prog1
993 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
994 (xstream (xstream-open-schema uri compactp)))
995 (klacks:with-open-source
996 (source (make-schema-source xstream))
997 (invoke-with-klacks-handler
998 (lambda ()
999 (klacks:find-event source :start-element)
1000 (let ((*datatype-library* ""))
1001 (p/pattern source)))
1002 source)))
1003 (skip-foreign* source))))))
1005 (defun p/grammar (source &optional grammar)
1006 (klacks:expecting-element (source "grammar")
1007 (consume-and-skip-to-native source)
1008 (let ((*grammar* (or grammar (make-grammar *grammar*)))
1009 (includep grammar))
1010 (process-grammar-content* source)
1011 (unless (or includep (grammar-start *grammar*))
1012 (rng-error source "no <start> in grammar"))
1013 (unless includep
1014 (check-pattern-definitions source *grammar*)
1015 (defn-child (grammar-start *grammar*))))))
1017 (defvar *include-start*)
1018 (defvar *include-definitions*)
1020 (defun process-grammar-content* (source &key disallow-include)
1021 (loop
1022 (multiple-value-bind (key uri lname) (klacks:peek source)
1024 (ecase key
1025 (:characters
1026 (klacks:consume source))
1027 (:start-element
1028 (with-library-and-ns (klacks:list-attributes source)
1029 (case (find-symbol lname :keyword)
1030 (:|start|
1031 (process-start source))
1032 (:|define| (process-define source))
1033 (:|div| (process-div source))
1034 (:|include|
1035 (when disallow-include
1036 (rng-error source "nested include not permitted"))
1037 (process-include source))
1039 (skip-foreign source)))))
1040 (:end-element
1041 (return))))))
1043 (defun process-start (source)
1044 (klacks:expecting-element (source "start")
1045 (let* ((combine0 (ntc "combine" source))
1046 (combine
1047 (when combine0
1048 (find-symbol (string-upcase combine0) :keyword)))
1049 (child
1050 (progn
1051 (consume-and-skip-to-native source)
1052 (p/pattern source)))
1053 (pdefinition (grammar-start *grammar*)))
1054 (skip-foreign* source)
1055 ;; fixme: shared code with process-define
1056 (unless pdefinition
1057 (setf pdefinition (make-definition :name :start :child nil))
1058 (setf (grammar-start *grammar*) pdefinition))
1059 (when *include-body-p*
1060 (setf *include-start* pdefinition))
1061 (cond
1062 ((defn-child pdefinition)
1063 (ecase (defn-redefinition pdefinition)
1064 (:not-being-redefined
1065 (when (and combine
1066 (defn-combine-method pdefinition)
1067 (not (eq combine
1068 (defn-combine-method pdefinition))))
1069 (rng-error source "conflicting combine values for <start>"))
1070 (unless combine
1071 (when (defn-head-p pdefinition)
1072 (rng-error source "multiple definitions for <start>"))
1073 (setf (defn-head-p pdefinition) t))
1074 (unless (defn-combine-method pdefinition)
1075 (setf (defn-combine-method pdefinition) combine))
1076 (setf (defn-child pdefinition)
1077 (case (defn-combine-method pdefinition)
1078 (:choice
1079 (make-choice (defn-child pdefinition) child))
1080 (:interleave
1081 (make-interleave (defn-child pdefinition) child)))))
1082 (:being-redefined-and-no-original
1083 (setf (defn-redefinition pdefinition)
1084 :being-redefined-and-original))
1085 (:being-redefined-and-original)))
1087 (setf (defn-child pdefinition) child)
1088 (setf (defn-combine-method pdefinition) combine)
1089 (setf (defn-head-p pdefinition) (null combine))
1090 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
1092 (defun zip (constructor children)
1093 (cond
1094 ((null children)
1095 (rng-error nil "empty choice?"))
1096 ((null (cdr children))
1097 (car children))
1099 (destructuring-bind (a b &rest rest)
1100 children
1101 (zip constructor (cons (funcall constructor a b) rest))))))
1103 (defun choice-ify (children) (zip #'make-choice children))
1104 (defun groupify (children) (zip #'make-group children))
1105 (defun interleave-ify (children) (zip #'make-interleave children))
1107 (defun find-definition (name &optional (grammar *grammar*))
1108 (gethash name (grammar-definitions grammar)))
1110 (defun (setf find-definition) (newval name &optional (grammar *grammar*))
1111 (setf (gethash name (grammar-definitions grammar)) newval))
1113 (defun process-define (source)
1114 (klacks:expecting-element (source "define")
1115 (let* ((name (ntc "name" source))
1116 (combine0 (ntc "combine" source))
1117 (combine (when combine0
1118 (find-symbol (string-upcase combine0) :keyword)))
1119 (child (groupify
1120 (progn
1121 (consume-and-skip-to-native source)
1122 (p/pattern+ source))))
1123 (pdefinition (find-definition name)))
1124 (unless pdefinition
1125 (setf pdefinition (make-definition :name name :child nil))
1126 (setf (find-definition name) pdefinition))
1127 (when *include-body-p*
1128 (push pdefinition *include-definitions*))
1129 (cond
1130 ((defn-child pdefinition)
1131 (case (defn-redefinition pdefinition)
1132 (:not-being-redefined
1133 (when (and combine
1134 (defn-combine-method pdefinition)
1135 (not (eq combine
1136 (defn-combine-method pdefinition))))
1137 (rng-error source "conflicting combine values for ~A" name))
1138 (unless combine
1139 (when (defn-head-p pdefinition)
1140 (rng-error source "multiple definitions for ~A" name))
1141 (setf (defn-head-p pdefinition) t))
1142 (unless (defn-combine-method pdefinition)
1143 (setf (defn-combine-method pdefinition) combine))
1144 (setf (defn-child pdefinition)
1145 (case (defn-combine-method pdefinition)
1146 (:choice
1147 (make-choice (defn-child pdefinition) child))
1148 (:interleave
1149 (make-interleave (defn-child pdefinition) child)))))
1150 (:being-redefined-and-no-original
1151 (setf (defn-redefinition pdefinition)
1152 :being-redefined-and-original))
1153 (:being-redefined-and-original)))
1155 (setf (defn-child pdefinition) child)
1156 (setf (defn-combine-method pdefinition) combine)
1157 (setf (defn-head-p pdefinition) (null combine))
1158 (setf (defn-redefinition pdefinition) :not-being-redefined))))))
1160 (defun process-div (source)
1161 (klacks:expecting-element (source "div")
1162 (consume-and-skip-to-native source)
1163 (process-grammar-content* source)))
1165 (defun reset-definition-for-include (defn)
1166 (setf (defn-combine-method defn) nil)
1167 (setf (defn-redefinition defn) :being-redefined-and-no-original)
1168 (setf (defn-head-p defn) nil))
1170 (defun restore-definition (defn original)
1171 (setf (defn-combine-method defn) (defn-combine-method original))
1172 (setf (defn-redefinition defn) (defn-redefinition original))
1173 (setf (defn-head-p defn) (defn-head-p original)))
1175 (defun process-include (source)
1176 (klacks:expecting-element (source "include")
1177 (let* ((href
1178 (escape-uri (attribute "href" (klacks:list-attributes source))))
1179 (base (klacks:current-xml-base source))
1180 (*include-start* nil)
1181 (*include-definitions* '()))
1182 (multiple-value-bind (uri compactp)
1183 (safe-parse-uri source href base)
1184 (consume-and-skip-to-native source)
1185 (let ((*include-body-p* t))
1186 (process-grammar-content* source :disallow-include t))
1187 (let ((tmp-start
1188 (when *include-start*
1189 (prog1
1190 (copy-structure *include-start*)
1191 (reset-definition-for-include *include-start*))))
1192 (tmp-defns
1193 (loop
1194 for defn in *include-definitions*
1195 collect
1196 (prog1
1197 (copy-structure defn)
1198 (reset-definition-for-include defn)))))
1199 (when (find uri *include-uri-stack* :test #'puri:uri=)
1200 (rng-error source "looping include"))
1201 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
1202 (xstream (xstream-open-schema uri compactp)))
1203 (klacks:with-open-source (source (make-schema-source xstream))
1204 (invoke-with-klacks-handler
1205 (lambda ()
1206 (klacks:find-event source :start-element)
1207 (let ((*datatype-library* ""))
1208 (p/grammar source *grammar*)))
1209 source))
1210 (when tmp-start
1211 (when (eq (defn-redefinition *include-start*)
1212 :being-redefined-and-no-original)
1213 (rng-error source "start not found in redefinition of grammar"))
1214 (restore-definition *include-start* tmp-start))
1215 (dolist (copy tmp-defns)
1216 (let ((defn (gethash (defn-name copy)
1217 (grammar-definitions *grammar*))))
1218 (when (eq (defn-redefinition defn)
1219 :being-redefined-and-no-original)
1220 (rng-error source "redefinition not found in grammar"))
1221 (restore-definition defn copy)))
1222 nil))))))
1224 (defun check-pattern-definitions (source grammar)
1225 (when (and (grammar-start grammar)
1226 (eq (defn-redefinition (grammar-start grammar))
1227 :being-redefined-and-no-original))
1228 (rng-error source "start not found in redefinition of grammar"))
1229 (loop for defn being each hash-value in (grammar-definitions grammar) do
1230 (when (eq (defn-redefinition defn) :being-redefined-and-no-original)
1231 (rng-error source "redefinition not found in grammar"))
1232 (unless (defn-child defn)
1233 (rng-error source "unresolved reference to ~A" (defn-name defn)))))
1235 (defvar *any-name-allowed-p* t)
1236 (defvar *ns-name-allowed-p* t)
1238 (defun destructure-name (source qname)
1239 (multiple-value-bind (uri lname)
1240 (klacks:decode-qname qname source)
1241 (setf uri (or uri *namespace-uri*))
1242 (when (and *attribute-namespace-p*
1243 (or (and (equal lname "xmlns") (equal uri ""))
1244 (equal uri "http://www.w3.org/2000/xmlns")))
1245 (rng-error source "namespace attribute not permitted"))
1246 (make-name uri lname)))
1248 (defun p/name-class (source)
1249 (klacks:expecting-element (source)
1250 (with-library-and-ns (klacks:list-attributes source)
1251 (case (find-symbol (klacks:current-lname source) :keyword)
1252 (:|name|
1253 (let ((qname (string-trim *whitespace*
1254 (consume-and-parse-characters source))))
1255 (destructure-name source qname)))
1256 (:|anyName|
1257 (unless *any-name-allowed-p*
1258 (rng-error source "anyname not permitted in except"))
1259 (klacks:consume source)
1260 (prog1
1261 (let ((*any-name-allowed-p* nil))
1262 (make-any-name (p/except-name-class? source)))
1263 (skip-to-native source)))
1264 (:|nsName|
1265 (unless *ns-name-allowed-p*
1266 (rng-error source "nsname not permitted in except"))
1267 (let ((uri *namespace-uri*)
1268 (*any-name-allowed-p* nil)
1269 (*ns-name-allowed-p* nil))
1270 (when (and *attribute-namespace-p*
1271 (equal uri "http://www.w3.org/2000/xmlns"))
1272 (rng-error source "namespace attribute not permitted"))
1273 (klacks:consume source)
1274 (prog1
1275 (make-ns-name uri (p/except-name-class? source))
1276 (skip-to-native source))))
1277 (:|choice|
1278 (klacks:consume source)
1279 (simplify-nc-choice (p/name-class* source)))
1281 (rng-error source "invalid child in except"))))))
1283 (defun p/name-class* (source)
1284 (let ((results nil))
1285 (loop
1286 (skip-to-native source)
1287 (case (klacks:peek source)
1288 (:characters
1289 (klacks:consume source))
1290 (:start-element
1291 (push (p/name-class source) results))
1292 (:end-element
1293 (return))))
1294 (nreverse results)))
1296 (defun p/except-name-class? (source)
1297 (skip-to-native source)
1298 (multiple-value-bind (key uri lname)
1299 (klacks:peek source)
1301 (if (and (eq key :start-element)
1302 (string= (find-symbol lname :keyword) "except"))
1303 (p/except-name-class source)
1304 nil)))
1306 (defun p/except-name-class (source)
1307 (klacks:expecting-element (source "except")
1308 (with-library-and-ns (klacks:list-attributes source)
1309 (klacks:consume source)
1310 (let ((x (p/name-class* source)))
1311 (if (cdr x)
1312 (simplify-nc-choice x)
1313 (car x))))))
1315 (defun escape-uri (string)
1316 (with-output-to-string (out)
1317 (loop for c across (cxml::rod-to-utf8-string string) do
1318 (let ((code (char-code c)))
1319 ;; http://www.w3.org/TR/xlink/#link-locators
1320 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
1321 (format out "%~2,'0X" code)
1322 (write-char c out))))))
1325 ;;;; unparsing
1327 (defvar *definitions-to-names*)
1328 (defvar *seen-names*)
1330 (defun serialization-name (defn)
1331 (or (gethash defn *definitions-to-names*)
1332 (setf (gethash defn *definitions-to-names*)
1333 (let ((name (if (gethash (defn-name defn) *seen-names*)
1334 (format nil "~A-~D"
1335 (defn-name defn)
1336 (hash-table-count *seen-names*))
1337 (defn-name defn))))
1338 (setf (gethash name *seen-names*) defn)
1339 name))))
1341 (defun serialize-schema (schema sink)
1342 "@arg[schema]{a Relax NG @class{schema}}
1343 @arg[sink]{a SAX handler}
1344 @return{the result of @code{sax:end-document}}
1345 @short{This function serializes a parsed Relax NG back into XML syntax.}
1347 Note that the schema represented in memory has gone through simplification
1348 as is textually different from the original XML document.
1350 @see{parse-schema}"
1351 (cxml:with-xml-output sink
1352 (let ((*definitions-to-names* (make-hash-table))
1353 (*seen-names* (make-hash-table :test 'equal)))
1354 (cxml:with-element "grammar"
1355 (cxml:with-element "start"
1356 (serialize-pattern (schema-start schema)))
1357 (loop for defn being each hash-key in *definitions-to-names* do
1358 (serialize-definition defn))))))
1360 (defun serialize-pattern (pattern)
1361 (etypecase pattern
1362 (element
1363 (cxml:with-element "element"
1364 (serialize-name (pattern-name pattern))
1365 (serialize-pattern (pattern-child pattern))))
1366 (attribute
1367 (cxml:with-element "attribute"
1368 (serialize-name (pattern-name pattern))
1369 (serialize-pattern (pattern-child pattern))))
1370 (%combination
1371 (cxml:with-element
1372 (etypecase pattern
1373 (group "group")
1374 (interleave "interleave")
1375 (choice "choice"))
1376 (serialize-pattern (pattern-a pattern))
1377 (serialize-pattern (pattern-b pattern))))
1378 (one-or-more
1379 (cxml:with-element "oneOrMore"
1380 (serialize-pattern (pattern-child pattern))))
1381 (list-pattern
1382 (cxml:with-element "list"
1383 (serialize-pattern (pattern-child pattern))))
1384 (ref
1385 (cxml:with-element "ref"
1386 (cxml:attribute "name" (serialization-name (pattern-target pattern)))))
1387 (empty
1388 (cxml:with-element "empty"))
1389 (not-allowed
1390 (cxml:with-element "notAllowed"))
1391 (text
1392 (cxml:with-element "text"))
1393 (value
1394 (cxml:with-element "value"
1395 (let ((type (pattern-type pattern)))
1396 (cxml:attribute "datatype-library"
1397 (symbol-name (cxml-types:type-library type)))
1398 (cxml:attribute "type" (cxml-types:type-name type)))
1399 (cxml:attribute "ns" (pattern-ns pattern))
1400 (cxml:text (pattern-string pattern))))
1401 (data
1402 (cxml:with-element "value"
1403 (let ((type (pattern-type pattern)))
1404 (cxml:attribute "datatype-library"
1405 (symbol-name (cxml-types:type-library type)))
1406 (cxml:attribute "type" (cxml-types:type-name type)))
1407 (dolist (param (pattern-params pattern))
1408 (cxml:with-element "param"
1409 (cxml:attribute "name" (cxml-types:param-name param))
1410 (cxml:text (cxml-types:param-value param))))
1411 (when (pattern-except pattern)
1412 (cxml:with-element "except"
1413 (serialize-pattern (pattern-except pattern))))))))
1415 (defun serialize-definition (defn)
1416 (cxml:with-element "define"
1417 (cxml:attribute "name" (serialization-name defn))
1418 (serialize-pattern (defn-child defn))))
1420 (defun serialize-name (name)
1421 (etypecase name
1422 (name
1423 (cxml:with-element "name"
1424 (cxml:attribute "ns" (name-uri name))
1425 (cxml:text (name-lname name))))
1426 (any-name
1427 (cxml:with-element "anyName"
1428 (when (any-name-except name)
1429 (serialize-except-name (any-name-except name)))))
1430 (ns-name
1431 (cxml:with-element "anyName"
1432 (cxml:attribute "ns" (ns-name-uri name))
1433 (when (ns-name-except name)
1434 (serialize-except-name (ns-name-except name)))))
1435 (name-class-choice
1436 (cxml:with-element "choice"
1437 (serialize-name (name-class-choice-a name))
1438 (serialize-name (name-class-choice-b name))))))
1440 (defun serialize-except-name (spec)
1441 (cxml:with-element "except"
1442 (serialize-name spec)))
1445 ;;;; simplification
1447 ;;; 4.1 Annotations
1448 ;;; Foreign attributes and elements are removed implicitly while parsing.
1450 ;;; 4.2 Whitespace
1451 ;;; All character data is discarded while parsing (which can only be
1452 ;;; whitespace after validation).
1454 ;;; Whitespace in name, type, and combine attributes is stripped while
1455 ;;; parsing. Ditto for <name/>.
1457 ;;; 4.3. datatypeLibrary attribute
1458 ;;; Escaping is done by p/pattern.
1459 ;;; Attribute value defaulting is done using *datatype-library*; only
1460 ;;; p/data and p/value record the computed value.
1462 ;;; 4.4. type attribute of value element
1463 ;;; Done by p/value.
1465 ;;; 4.5. href attribute
1466 ;;; Escaping is done by process-include and p/external-ref.
1468 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
1469 ;;; but that requires xstream hacking.
1471 ;;; 4.6. externalRef element
1472 ;;; Done by p/external-ref.
1474 ;;; 4.7. include element
1475 ;;; Done by process-include.
1477 ;;; 4.8. name attribute of element and attribute elements
1478 ;;; `name' is stored as a slot, not a child. Done by p/element and
1479 ;;; p/attribute.
1481 ;;; 4.9. ns attribute
1482 ;;; done by p/name-class, p/value, p/element, p/attribute
1484 ;;; 4.10. QNames
1485 ;;; done by p/name-class
1487 ;;; 4.11. div element
1488 ;;; Legen wir gar nicht erst an.
1490 ;;; 4.12. 4.13 4.14 4.15
1491 ;;; beim anlegen
1493 ;;; 4.16
1494 ;;; p/name-class
1495 ;;; -- ausser der sache mit den datentypen
1497 ;;; 4.17, 4.18, 4.19
1498 ;;; Ueber die Grammar-und Definition Objekte, wie von James Clark
1499 ;;; beschrieben.
1501 ;;; Dabei werden keine Umbenennungen vorgenommen, weil Referenzierung
1502 ;;; durch Aufbei der Graphenstruktur zwischen ref und Definition
1503 ;;; erfolgt und Namen dann bereits aufgeloest sind. Wir benennen
1504 ;;; dafuer beim Serialisieren um.
1506 (defmethod check-recursion ((pattern element) depth)
1507 (check-recursion (pattern-child pattern) (1+ depth)))
1509 (defmethod check-recursion ((pattern ref) depth)
1510 (when (eql (pattern-crdepth pattern) depth)
1511 (rng-error nil "infinite recursion in ~A"
1512 (defn-name (pattern-target pattern))))
1513 (when (null (pattern-crdepth pattern))
1514 (setf (pattern-crdepth pattern) depth)
1515 (check-recursion (defn-child (pattern-target pattern)) depth)
1516 (setf (pattern-crdepth pattern) t)))
1518 (defmethod check-recursion ((pattern %parent) depth)
1519 (check-recursion (pattern-child pattern) depth))
1521 (defmethod check-recursion ((pattern %combination) depth)
1522 (check-recursion (pattern-a pattern) depth)
1523 (check-recursion (pattern-b pattern) depth))
1525 (defmethod check-recursion ((pattern %leaf) depth)
1526 (declare (ignore depth)))
1528 (defmethod check-recursion ((pattern data) depth)
1529 (when (pattern-except pattern)
1530 (check-recursion (pattern-except pattern) depth)))
1533 ;;;; 4.20
1535 ;;; %PARENT
1537 (defmethod fold-not-allowed ((pattern element))
1538 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1539 pattern)
1541 (defmethod fold-not-allowed ((pattern %parent))
1542 (setf (pattern-child pattern) (fold-not-allowed (pattern-child pattern)))
1543 (if (typep (pattern-child pattern) 'not-allowed)
1544 (pattern-child pattern)
1545 pattern))
1547 ;;; %COMBINATION
1549 (defmethod fold-not-allowed ((pattern %combination))
1550 (setf (pattern-a pattern) (fold-not-allowed (pattern-a pattern)))
1551 (setf (pattern-b pattern) (fold-not-allowed (pattern-b pattern)))
1552 pattern)
1554 (defmethod fold-not-allowed ((pattern group))
1555 (call-next-method)
1556 (cond
1557 ;; remove if any child is not allowed
1558 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1559 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1560 (t pattern)))
1562 (defmethod fold-not-allowed ((pattern interleave))
1563 (call-next-method)
1564 (cond
1565 ;; remove if any child is not allowed
1566 ((typep (pattern-a pattern) 'not-allowed) (pattern-a pattern))
1567 ((typep (pattern-b pattern) 'not-allowed) (pattern-b pattern))
1568 (t pattern)))
1570 (defmethod fold-not-allowed ((pattern choice))
1571 (call-next-method)
1572 (cond
1573 ;; if any child is not allowed, choose the other
1574 ((typep (pattern-a pattern) 'not-allowed) (pattern-b pattern))
1575 ((typep (pattern-b pattern) 'not-allowed) (pattern-a pattern))
1576 (t pattern)))
1578 ;;; LEAF
1580 (defmethod fold-not-allowed ((pattern %leaf))
1581 pattern)
1583 (defmethod fold-not-allowed ((pattern data))
1584 (when (pattern-except pattern)
1585 (setf (pattern-except pattern) (fold-not-allowed (pattern-except pattern)))
1586 (when (typep (pattern-except pattern) 'not-allowed)
1587 (setf (pattern-except pattern) nil)))
1588 pattern)
1590 ;;; REF
1592 (defmethod fold-not-allowed ((pattern ref))
1593 pattern)
1596 ;;;; 4.21
1598 ;;; %PARENT
1600 (defmethod fold-empty ((pattern one-or-more))
1601 (call-next-method)
1602 (if (typep (pattern-child pattern) 'empty)
1603 (pattern-child pattern)
1604 pattern))
1606 (defmethod fold-empty ((pattern %parent))
1607 (setf (pattern-child pattern) (fold-empty (pattern-child pattern)))
1608 pattern)
1610 ;;; %COMBINATION
1612 (defmethod fold-empty ((pattern %combination))
1613 (setf (pattern-a pattern) (fold-empty (pattern-a pattern)))
1614 (setf (pattern-b pattern) (fold-empty (pattern-b pattern)))
1615 pattern)
1617 (defmethod fold-empty ((pattern group))
1618 (call-next-method)
1619 (cond
1620 ;; if any child is empty, choose the other
1621 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1622 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1623 (t pattern)))
1625 (defmethod fold-empty ((pattern interleave))
1626 (call-next-method)
1627 (cond
1628 ;; if any child is empty, choose the other
1629 ((typep (pattern-a pattern) 'empty) (pattern-b pattern))
1630 ((typep (pattern-b pattern) 'empty) (pattern-a pattern))
1631 (t pattern)))
1633 (defmethod fold-empty ((pattern choice))
1634 (call-next-method)
1635 (if (typep (pattern-b pattern) 'empty)
1636 (cond
1637 ((typep (pattern-a pattern) 'empty)
1638 (pattern-a pattern))
1640 (rotatef (pattern-a pattern) (pattern-b pattern))
1641 pattern))
1642 pattern))
1644 ;;; LEAF
1646 (defmethod fold-empty ((pattern %leaf))
1647 pattern)
1649 (defmethod fold-empty ((pattern data))
1650 (when (pattern-except pattern)
1651 (setf (pattern-except pattern) (fold-empty (pattern-except pattern))))
1652 pattern)
1654 ;;; REF
1656 (defmethod fold-empty ((pattern ref))
1657 pattern)
1660 ;;;; name class overlap
1662 ;;; fixme: memorize this stuff?
1664 (defparameter !uri (string (code-char 1)))
1665 (defparameter !lname "")
1667 (defun classes-overlap-p (nc1 nc2)
1668 (flet ((both-contain (x)
1669 (and (contains nc1 (car x) (cdr x))
1670 (contains nc2 (car x) (cdr x)))))
1671 (or (some #'both-contain (representatives nc1))
1672 (some #'both-contain (representatives nc2)))))
1674 (defmethod representatives ((nc any-name))
1675 (cons (cons !uri !lname)
1676 (if (any-name-except nc)
1677 (representatives (any-name-except nc))
1678 nil)))
1680 (defmethod representatives ((nc ns-name))
1681 (cons (cons (ns-name-uri nc) !lname)
1682 (if (ns-name-except nc)
1683 (representatives (ns-name-except nc))
1684 nil)))
1686 (defmethod representatives ((nc name))
1687 (list (cons (name-uri nc) (name-lname nc))))
1689 (defmethod representatives ((nc name-class-choice))
1690 (nconc (representatives (name-class-choice-a nc))
1691 (representatives (name-class-choice-b nc))))
1694 ;;;; 7.1
1696 (defun finalize-definitions (pattern)
1697 (let ((defns (make-hash-table)))
1698 (labels ((recurse (p)
1699 (cond
1700 ((typep p 'ref)
1701 (let ((target (pattern-target p)))
1702 (unless (gethash target defns)
1703 (setf (gethash target defns) t)
1704 (setf (defn-child target) (recurse (defn-child target))))
1705 (if (typep (defn-child target) 'element)
1707 (copy-pattern-tree (defn-child target)))))
1709 (etypecase p
1710 (data
1711 (when (pattern-except p)
1712 (setf (pattern-except p) (recurse (pattern-except p)))))
1713 (%parent
1714 (setf (pattern-child p) (recurse (pattern-child p))))
1715 (%combination
1716 (setf (pattern-a p) (recurse (pattern-a p)))
1717 (setf (pattern-b p) (recurse (pattern-b p))))
1718 (%leaf))
1719 p))))
1720 (values
1721 (recurse pattern)
1722 (loop
1723 for defn being each hash-key in defns
1724 collect defn)))))
1726 (defun copy-pattern-tree (pattern)
1727 (labels ((recurse (p)
1728 (let ((q (copy-structure p)))
1729 (etypecase p
1730 (data
1731 (when (pattern-except p)
1732 (setf (pattern-except q) (recurse (pattern-except p)))))
1733 (%parent
1734 (setf (pattern-child q) (recurse (pattern-child p))))
1735 (%combination
1736 (setf (pattern-a q) (recurse (pattern-a p)))
1737 (setf (pattern-b q) (recurse (pattern-b p))))
1738 ((or %leaf ref)))
1739 q)))
1740 (recurse pattern)))
1742 (defparameter *in-attribute-p* nil)
1743 (defparameter *in-one-or-more-p* nil)
1744 (defparameter *in-one-or-more//group-or-interleave-p* nil)
1745 (defparameter *in-list-p* nil)
1746 (defparameter *in-data-except-p* nil)
1747 (defparameter *in-start-p* nil)
1749 (defun check-start-restrictions (pattern)
1750 (let ((*in-start-p* t))
1751 (check-restrictions pattern)))
1753 (defun content-type-max (a b)
1754 (if (and a b)
1755 (cond
1756 ((eq a :empty) b)
1757 ((eq b :empty) a)
1758 ((eq a :complex) b)
1759 (:simple))
1760 nil))
1762 (defun groupable-max (a b)
1763 (if (or (eq a :empty)
1764 (eq b :empty)
1765 (and (eq a :complex)
1766 (eq b :complex)))
1767 (content-type-max a b)
1768 nil))
1770 (defun assert-name-class-finite (nc)
1771 (etypecase nc
1772 ((or any-name ns-name)
1773 (rng-error nil "infinite attribute name class outside of one-or-more"))
1774 (name)
1775 (name-class-choice
1776 (assert-name-class-finite (name-class-choice-a nc))
1777 (assert-name-class-finite (name-class-choice-b nc)))))
1779 (defmethod check-restrictions ((pattern attribute))
1780 (when *in-attribute-p*
1781 (rng-error nil "nested attribute not allowed"))
1782 (when *in-one-or-more//group-or-interleave-p*
1783 (rng-error nil "attribute not allowed in oneOrMore//group, oneOrMore//interleave"))
1784 (when *in-list-p*
1785 (rng-error nil "attribute in list not allowed"))
1786 (when *in-data-except-p*
1787 (rng-error nil "attribute in data/except not allowed"))
1788 (when *in-start-p*
1789 (rng-error nil "attribute in start not allowed"))
1790 (let ((*in-attribute-p* t))
1791 (unless *in-one-or-more-p*
1792 (assert-name-class-finite (pattern-name pattern)))
1793 (values (if (check-restrictions (pattern-child pattern))
1794 :empty
1795 nil)
1796 (list (pattern-name pattern))
1797 nil)))
1799 (defmethod check-restrictions ((pattern ref))
1800 (when *in-attribute-p*
1801 (rng-error nil "ref in attribute not allowed"))
1802 (when *in-list-p*
1803 (rng-error nil "ref in list not allowed"))
1804 (when *in-data-except-p*
1805 (rng-error nil "ref in data/except not allowed"))
1806 (values :complex
1808 (list (pattern-name (defn-child (pattern-target pattern))))
1809 nil))
1811 (defmethod check-restrictions ((pattern one-or-more))
1812 (when *in-data-except-p*
1813 (rng-error nil "oneOrMore in data/except not allowed"))
1814 (when *in-start-p*
1815 (rng-error nil "one-or-more in start not allowed"))
1816 (let* ((*in-one-or-more-p* t))
1817 (multiple-value-bind (x a e textp)
1818 (check-restrictions (pattern-child pattern))
1819 (values (groupable-max x x) a e textp))))
1821 (defmethod check-restrictions ((pattern group))
1822 (when *in-data-except-p*
1823 (rng-error nil "group in data/except not allowed"))
1824 (when *in-start-p*
1825 (rng-error nil "group in start not allowed"))
1826 (let ((*in-one-or-more//group-or-interleave-p*
1827 *in-one-or-more-p*))
1828 (multiple-value-bind (x a e tp) (check-restrictions (pattern-a pattern))
1829 (multiple-value-bind (y b f tq) (check-restrictions (pattern-b pattern))
1830 (dolist (nc1 a)
1831 (dolist (nc2 b)
1832 (when (classes-overlap-p nc1 nc2)
1833 (rng-error nil "attribute name overlap in group: ~A ~A"
1834 nc1 nc2))))
1835 (values (groupable-max x y)
1836 (append a b)
1837 (append e f)
1838 (or tp tq))))))
1840 (defmethod check-restrictions ((pattern interleave))
1841 (when *in-list-p*
1842 (rng-error nil "interleave in list not allowed"))
1843 (when *in-data-except-p*
1844 (rng-error nil "interleave in data/except not allowed"))
1845 (when *in-start-p*
1846 (rng-error nil "interleave in start not allowed"))
1847 (let ((*in-one-or-more//group-or-interleave-p*
1848 *in-one-or-more-p*))
1849 (multiple-value-bind (x a e tp) (check-restrictions (pattern-a pattern))
1850 (multiple-value-bind (y b f tq) (check-restrictions (pattern-b pattern))
1851 (dolist (nc1 a)
1852 (dolist (nc2 b)
1853 (when (classes-overlap-p nc1 nc2)
1854 (rng-error nil "attribute name overlap in interleave: ~A ~A"
1855 nc1 nc2))))
1856 (dolist (nc1 e)
1857 (dolist (nc2 f)
1858 (when (classes-overlap-p nc1 nc2)
1859 (rng-error nil "element name overlap in interleave: ~A ~A"
1860 nc1 nc2))))
1861 (when (and tp tq)
1862 (rng-error nil "multiple text permitted by interleave"))
1863 (values (groupable-max x y)
1864 (append a b)
1865 (append e f)
1866 (or tp tq))))))
1868 (defmethod check-restrictions ((pattern choice))
1869 (multiple-value-bind (x a e tp) (check-restrictions (pattern-a pattern))
1870 (multiple-value-bind (y b f tq) (check-restrictions (pattern-b pattern))
1871 (values (content-type-max x y)
1872 (append a b)
1873 (append e f)
1874 (or tp tq)))))
1876 (defmethod check-restrictions ((pattern list-pattern))
1877 (when *in-list-p*
1878 (rng-error nil "nested list not allowed"))
1879 (when *in-data-except-p*
1880 (rng-error nil "list in data/except not allowed"))
1881 (let ((*in-list-p* t))
1882 (check-restrictions (pattern-child pattern)))
1883 (when *in-start-p*
1884 (rng-error nil "list in start not allowed"))
1885 :simple)
1887 (defmethod check-restrictions ((pattern text))
1888 (when *in-list-p*
1889 (rng-error nil "text in list not allowed"))
1890 (when *in-data-except-p*
1891 (rng-error nil "text in data/except not allowed"))
1892 (when *in-start-p*
1893 (rng-error nil "text in start not allowed"))
1894 (values :complex nil nil t))
1896 (defmethod check-restrictions ((pattern data))
1897 (when *in-start-p*
1898 (rng-error nil "data in start not allowed"))
1899 (when (pattern-except pattern)
1900 (let ((*in-data-except-p* t))
1901 (check-restrictions (pattern-except pattern))))
1902 :simple)
1904 (defmethod check-restrictions ((pattern value))
1905 (when *in-start-p*
1906 (rng-error nil "value in start not allowed"))
1907 :simple)
1909 (defmethod check-restrictions ((pattern empty))
1910 (when *in-data-except-p*
1911 (rng-error nil "empty in data/except not allowed"))
1912 (when *in-start-p*
1913 (rng-error nil "empty in start not allowed"))
1914 :empty)
1916 (defmethod check-restrictions ((pattern element))
1917 (unless (check-restrictions (pattern-child pattern))
1918 (rng-error nil "restrictions on string sequences violated")))
1920 (defmethod check-restrictions ((pattern not-allowed))
1921 nil)