1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; An implementation of James Clark's algorithm for RELAX NG validation.
4 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 (in-package :cxml-rng
)
33 (defvar *empty
* (make-empty))
34 (defvar *not-allowed
* (make-not-allowed))
37 (defun make-validator (grammar)
38 (let* ((table (ensure-registratur grammar
))
39 (start (schema-interned-start grammar
))
41 (make-instance 'validator
43 :current-pattern start
)))
44 (make-instance 'text-normalizer
:chained-handler validator
)))
49 (defgeneric contains
(nc uri lname
))
51 (defmethod contains ((nc any-name
) uri lname
)
52 (let ((except (any-name-except nc
)))
54 (not (contains except uri lname
))
57 (defmethod contains ((nc ns-name
) uri lname
)
58 (and (equal (ns-name-uri nc
) uri
)
59 (let ((except (ns-name-except nc
)))
61 (not (contains except uri lname
))
64 (defmethod contains ((nc name
) uri lname
)
65 (and (equal (name-uri nc
) uri
)
66 (equal (name-lname nc
) lname
)))
68 (defmethod contains ((nc name-class-choice
) uri lname
)
69 (or (contains (name-class-choice-a nc
) uri lname
)
70 (contains (name-class-choice-b nc
) uri lname
)))
75 (defgeneric nullable
(pattern))
77 (defmethod nullable ((pattern group
))
78 (and (nullable (pattern-a pattern
))
79 (nullable (pattern-b pattern
))))
81 (defmethod nullable ((pattern interleave
))
82 (and (nullable (pattern-a pattern
))
83 (nullable (pattern-b pattern
))))
85 (defmethod nullable ((pattern choice
))
86 (or (nullable (pattern-a pattern
))
87 (nullable (pattern-b pattern
))))
89 (defmethod nullable ((pattern one-or-more
))
90 (nullable (pattern-child pattern
)))
92 (defmethod nullable ((pattern element
)) nil
)
93 (defmethod nullable ((pattern attribute
)) nil
)
94 (defmethod nullable ((pattern list-pattern
)) nil
)
95 (defmethod nullable ((pattern value
)) nil
)
96 (defmethod nullable ((pattern data
)) nil
)
97 (defmethod nullable ((pattern not-allowed
)) nil
)
98 (defmethod nullable ((pattern after
)) nil
)
100 (defmethod nullable ((pattern empty
)) t
)
101 (defmethod nullable ((pattern text
)) t
)
106 (defclass validator
(sax:sax-parser-mixin
107 cxml-types
:sax-validation-context-mixin
)
108 ((current-pattern :initarg
:current-pattern
:accessor current-pattern
)
109 (after-start-tag-p :accessor after-start-tag-p
)
110 (pending-text-node :initform nil
:accessor pending-text-node
)
111 (registratur :initarg
:registratur
:accessor registratur
)))
113 (defun advance (hsx pattern message
)
114 (when (typep pattern
'not-allowed
)
115 (rng-error hsx
"~A, was expecting a ~A"
117 (replace-scary-characters (current-pattern hsx
))))
118 (setf (current-pattern hsx
) pattern
))
120 ;; make sure slime doesn't die
121 (defun replace-scary-characters (pattern)
122 (let ((str (write-to-string pattern
129 when
(>= (char-code c
) 128)
130 do
(setf (elt str i
) #\?))
133 (defmethod sax:characters
((hsx validator
) data
)
134 (assert (null (pending-text-node hsx
))) ;parser must be normalize
135 (if (after-start-tag-p hsx
)
136 (setf (pending-text-node hsx
) data
)
137 (unless (whitespacep data
)
138 ;; we already saw an element sibling, so discard whitespace
140 (text\' hsx
(current-pattern hsx
) data
)
141 "text node not valid")))
142 (setf (after-start-tag-p hsx
) nil
))
144 (defmethod sax:start-element
((hsx validator
) uri lname qname attributes
)
145 (declare (ignore qname
))
146 (when (pending-text-node hsx
)
147 ;; text node was the previous child, and we're in element content.
148 ;; process non-whitespace now; discard whitespace completely
149 (let ((data (pending-text-node hsx
)))
150 (unless (whitespacep data
)
152 (text\' hsx
(current-pattern hsx
) data
)
154 (setf (pending-text-node hsx
) nil
))
156 (remove-if (cxml::compose
#'cxml
::xmlns-attr-p
#'sax
:attribute-qname
)
158 (let* ((p0 (current-pattern hsx
))
159 (p1 (open-start-tag\' hsx p0 uri lname
))
161 (advance hsx p1
"element not valid")
162 (attributes\' hsx p1 attributes
)))
164 (advance hsx p2
"attributes not valid")
165 (close-start-tag\' hsx p2
))))
166 (advance hsx p3
"attributes not valid")
167 (setf (after-start-tag-p hsx
) t
)))
169 (defmethod sax:end-element
((hsx validator
) uri lname qname
)
170 (declare (ignore uri lname qname
))
171 (when (after-start-tag-p hsx
)
172 ;; nothing at all? pretend we saw whitespace.
173 (sax:characters hsx
""))
174 (when (pending-text-node hsx
)
175 ;; text node was the only child?
176 ;; process it and handle whitespace specially
177 (let* ((current (current-pattern hsx
))
178 (data (pending-text-node hsx
))
179 (next (text\' hsx current data
)))
181 (if (whitespacep data
)
182 (intern-choice hsx current next
)
184 "text node not valid"))
185 (setf (pending-text-node hsx
) nil
))
187 (end-tag\' hsx
(current-pattern hsx
))
188 "end of element not valid"))
193 (defgeneric text
\' (handler pattern data
))
195 (defmethod text\' (hsx (pattern choice
) data
)
197 (text\' hsx
(pattern-a pattern
) data
)
198 (text\' hsx
(pattern-b pattern
) data
)))
200 (defmethod text\' (hsx (pattern interleave
) data
)
201 (let ((a (pattern-a pattern
))
202 (b (pattern-b pattern
)))
204 (intern-interleave hsx
(text\' hsx a data
) b
)
205 (intern-interleave hsx a
(text\' hsx b data
)))))
207 (defmethod text\' (hsx (pattern group
) data
)
208 (let* ((a (pattern-a pattern
))
209 (b (pattern-b pattern
))
210 (p (intern-group hsx
(text\' hsx a data
) b
)))
212 (intern-choice hsx p
(text\' hsx b data
))
215 (defmethod text\' (hsx (pattern after
) data
)
217 (text\' hsx
(pattern-a pattern
) data
)
218 (pattern-b pattern
)))
220 (defmethod text\' (hsx (pattern one-or-more
) data
)
221 (let ((child (pattern-child pattern
)))
223 (text\' hsx child data
)
224 (intern-zero-or-more hsx child
))))
226 (defmethod text\' (hsx (pattern text
) data
)
227 (declare (ignore data
))
231 (if ok
*empty
* *not-allowed
*))
233 (defmethod text\' (hsx (pattern value
) data
)
234 (let ((data-type (pattern-type pattern
)))
235 (eat (cxml-types:equal-using-type
237 (pattern-value pattern
)
238 (cxml-types:parse data-type data hsx
)))))
240 (defmethod text\' (hsx (pattern data
) data
)
241 (eat (and (cxml-types:validp
(pattern-type pattern
) data hsx
)
242 (let ((except (pattern-except pattern
)))
243 (not (and except
(nullable (text\' hsx except data
))))))))
245 (defmethod text\' (hsx (pattern list-pattern
) data
)
246 (eat (nullable (list\' hsx
(pattern-child pattern
) (words data
)))))
248 (defmethod text\' (hsx pattern data
)
249 (declare (ignore pattern data
))
252 (defun list\' (hsx pattern words
)
254 (setf pattern
(text\' hsx pattern word
)))
258 (cl-ppcre:split
#.
(format nil
"[~A]+" *whitespace
*)
259 (string-trim *whitespace
* str
)))
264 (defmacro ensuref
(key table value
)
265 `(ensure-hash ,key
,table
(lambda () ,value
)))
267 (defun ensure-hash (key table fn
)
268 (or (gethash key table
)
269 (setf (gethash key table
) (funcall fn
))))
271 (defgeneric intern-choice
(handler a b
))
272 (defmethod intern-choice (hsx a
(b not-allowed
)) a
)
273 (defmethod intern-choice (hsx (a not-allowed
) b
) b
)
274 (defmethod intern-choice (hsx a b
)
275 (ensuref (list 'choice a b
) (registratur hsx
) (make-choice a b
)))
277 (defgeneric intern-group
(handler a b
))
278 (defmethod intern-group (hsx (a pattern
) (b not-allowed
)) b
)
279 (defmethod intern-group (hsx (a not-allowed
) (b pattern
)) a
)
280 (defmethod intern-group (hsx a
(b empty
)) a
)
281 (defmethod intern-group (hsx (a empty
) b
) b
)
282 (defmethod intern-group (hsx a b
)
283 (ensuref (list 'group a b
) (registratur hsx
) (make-group a b
)))
285 (defgeneric intern-interleave
(handler a b
))
286 (defmethod intern-interleave (hsx (a pattern
) (b not-allowed
)) b
)
287 (defmethod intern-interleave (hsx (a not-allowed
) (b pattern
)) a
)
288 (defmethod intern-interleave (hsx a
(b empty
)) a
)
289 (defmethod intern-interleave (hsx (a empty
) b
) b
)
290 (defmethod intern-interleave (hsx a b
)
291 (ensuref (list 'interleave a b
) (registratur hsx
) (make-interleave a b
)))
293 (defgeneric intern-after
(handler a b
))
294 (defmethod intern-after (hsx (a pattern
) (b not-allowed
)) b
)
295 (defmethod intern-after (hsx (a not-allowed
) (b pattern
)) a
)
296 (defmethod intern-after (hsx a b
)
297 (ensuref (list 'after a b
) (registratur hsx
) (make-after a b
)))
299 (defgeneric intern-one-or-more
(handler c
))
300 (defmethod intern-one-or-more (hsx (c not-allowed
)) c
)
301 (defmethod intern-one-or-more (hsx c
)
302 (ensuref (list 'one-or-more c
) (registratur hsx
) (make-one-or-more c
)))
305 ;;;; ENSURE-REGISTRATUR
307 (defvar *seen-elements
*)
309 (defun ensure-registratur (grammar)
310 (or (schema-registratur grammar
)
311 (setf (schema-registratur grammar
)
312 (let ((table (make-hash-table :test
'equal
))
313 (*seen-elements
* '())
315 (setf (schema-interned-start grammar
)
316 (intern-pattern (schema-start grammar
) table
))
318 for elements
= *seen-elements
*
320 (setf *seen-elements
* nil
)
321 (dolist (pattern elements
)
322 (unless (find pattern done-elements
)
323 (push pattern done-elements
)
324 (setf (pattern-child pattern
)
325 (intern-pattern (pattern-child pattern
) table
)))))
328 ;;; FIXME: misnamed. we don't really intern the originals pattern yet.
330 (defgeneric intern-pattern
(pattern table
))
332 (defmethod intern-pattern ((pattern element
) table
)
333 (pushnew pattern
*seen-elements
*)
336 (defmethod intern-pattern ((pattern %parent
) table
)
337 (let ((c (intern-pattern (pattern-child pattern
) table
)))
338 (if (eq c
(pattern-child pattern
))
340 (let ((copy (copy-structure pattern
)))
341 (setf (pattern-child copy
) c
)
344 (defmethod intern-pattern ((pattern %combination
) table
)
345 (let ((a (intern-pattern (pattern-a pattern
) table
))
346 (b (intern-pattern (pattern-b pattern
) table
)))
347 (if (and (eq a
(pattern-a pattern
)) (eq b
(pattern-b pattern
)))
349 (let ((copy (copy-structure pattern
)))
350 (setf (pattern-a copy
) a
)
351 (setf (pattern-b copy
) b
)
354 (defmethod intern-pattern ((pattern data
) table
)
355 (let ((e (when (pattern-except pattern
)
356 (intern-pattern (pattern-except pattern
) table
))))
357 (if (eq e
(pattern-except pattern
))
359 (let ((copy (copy-structure pattern
)))
360 (setf (pattern-except copy
) e
)
363 (defmethod intern-pattern ((pattern ref
) table
)
364 (intern-pattern (defn-child (pattern-target pattern
)) table
))
366 (defmethod intern-pattern ((pattern empty
) table
)
369 (defmethod intern-pattern ((pattern not-allowed
) table
)
372 (defmethod intern-pattern ((pattern %leaf
) table
)
378 (defgeneric apply-after
(handler fn pattern
))
380 (defmethod apply-after (hsx fn
(pattern after
))
383 (funcall fn
(pattern-b pattern
))))
385 (defmethod apply-after (hsx fn
(pattern choice
))
387 (apply-after hsx fn
(pattern-a pattern
))
388 (apply-after hsx fn
(pattern-b pattern
))))
390 (defmethod apply-after (hsx fn
(pattern not-allowed
))
391 (declare (ignore hsx fn
))
397 (defgeneric open-start-tag
\' (handler pattern uri lname
))
399 (defmethod open-start-tag\' (hsx (pattern choice
) uri lname
)
401 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)
402 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
)))
404 (defmethod open-start-tag\' (hsx (pattern element
) uri lname
)
405 (if (contains (pattern-name pattern
) (or uri
"") lname
)
406 (intern-after hsx
(pattern-child pattern
) *empty
*)
409 (defmethod open-start-tag\' (hsx (pattern interleave
) uri lname
)
413 (lambda (p) (intern-interleave hsx p
(pattern-b pattern
)))
414 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))
417 (lambda (p) (intern-interleave hsx
(pattern-a pattern
) p
))
418 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))))
420 (defun intern-zero-or-more (hsx c
)
421 (intern-choice hsx
(intern-one-or-more hsx c
) *empty
*))
423 (defmethod open-start-tag\' (hsx (pattern one-or-more
) uri lname
)
424 (let ((c (intern-zero-or-more hsx
(pattern-child pattern
))))
426 (lambda (p) (intern-group hsx p c
))
427 (open-start-tag\' hsx
(pattern-child pattern
) uri lname
))))
429 (defmethod open-start-tag\' (hsx (pattern group
) uri lname
)
430 (let ((x (apply-after hsx
432 (intern-group hsx p
(pattern-b pattern
)))
433 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))))
434 (if (nullable (pattern-a pattern
))
437 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))
440 (defmethod open-start-tag\' (hsx (pattern after
) uri lname
)
443 (intern-after hsx p
(pattern-b pattern
)))
444 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)))
446 (defmethod open-start-tag\' (hsx pattern uri lname
)
447 (declare (ignore hsx pattern uri lname
))
453 (defun attributes\' (handler pattern attributes
)
454 (dolist (a attributes
)
455 (setf pattern
(attribute\' handler pattern a
)))
458 (defgeneric attribute
\' (handler pattern attribute
))
460 (defmethod attribute\' (hsx (pattern after
) a
)
462 (attribute\' hsx
(pattern-a pattern
) a
)
463 (pattern-b pattern
)))
465 (defmethod attribute\' (hsx (pattern choice
) a
)
467 (attribute\' hsx
(pattern-a pattern
) a
)
468 (attribute\' hsx
(pattern-b pattern
) a
)))
470 (defmethod attribute\' (hsx (pattern group
) a
)
473 (attribute\' hsx
(pattern-a pattern
) a
)
477 (attribute\' hsx
(pattern-b pattern
) a
))))
479 (defmethod attribute\' (hsx (pattern interleave
) a
)
481 (intern-interleave hsx
482 (attribute\' hsx
(pattern-a pattern
) a
)
484 (intern-interleave hsx
486 (attribute\' hsx
(pattern-b pattern
) a
))))
488 (defmethod attribute\' (hsx (pattern one-or-more
) a
)
490 (attribute\' hsx
(pattern-child pattern
) a
)
491 (intern-zero-or-more hsx
(pattern-child pattern
))))
493 (defmethod attribute\' (hsx (pattern attribute
) a
)
494 (eat (and (contains (pattern-name pattern
)
495 (or (sax:attribute-namespace-uri a
) "")
496 (sax:attribute-local-name a
))
498 (pattern-child pattern
)
499 (sax:attribute-value a
)))))
501 (defun value-matches-p (hsx pattern value
)
502 (or (and (nullable pattern
) (whitespacep value
))
503 (nullable (text\' hsx pattern value
))))
505 (defun whitespacep (str)
506 (zerop (length (string-trim *whitespace
* str
))))
508 (defmethod attribute\' (hsx pattern a
)
509 (declare (ignore hsx pattern a
))
513 ;;;; CLOSE-START-TAG'
515 (defgeneric close-start-tag
\' (handler pattern
))
517 (defmethod close-start-tag\' (hsx (pattern after
))
519 (close-start-tag\' hsx
(pattern-a pattern
))
520 (pattern-b pattern
)))
522 (defmethod close-start-tag\' (hsx (pattern choice
))
524 (close-start-tag\' hsx
(pattern-a pattern
))
525 (close-start-tag\' hsx
(pattern-b pattern
))))
527 (defmethod close-start-tag\' (hsx (pattern group
))
529 (close-start-tag\' hsx
(pattern-a pattern
))
530 (close-start-tag\' hsx
(pattern-b pattern
))))
532 (defmethod close-start-tag\' (hsx (pattern interleave
))
533 (intern-interleave hsx
534 (close-start-tag\' hsx
(pattern-a pattern
))
535 (close-start-tag\' hsx
(pattern-b pattern
))))
537 (defmethod close-start-tag\' (hsx (pattern one-or-more
))
538 (intern-one-or-more hsx
(close-start-tag\' hsx
(pattern-child pattern
))))
540 (defmethod close-start-tag\' (hsx (pattern attribute
))
541 (declare (ignore hsx
))
544 (defmethod close-start-tag\' (hsx pattern
)
545 (declare (ignore hsx
))
551 (defgeneric end-tag
\' (handler pattern
))
553 (defmethod end-tag\' (hsx (pattern choice
))
555 (end-tag\' hsx
(pattern-a pattern
))
556 (end-tag\' hsx
(pattern-b pattern
))))
558 (defmethod end-tag\' (hsx (pattern after
))
559 (if (nullable (pattern-a pattern
))
563 (defmethod end-tag\' (hsx pattern
)
564 (declare (ignore hsx pattern
))
570 ;;; FIXME: cxml should do that
572 ;;; FIXME: since we ignore PI, CDATA, and comment events, we should probably
573 ;;; discard them properly.
575 (defclass text-normalizer
(cxml:sax-proxy
)
576 ((pending-text-node :initform
(make-string-output-stream)
577 :accessor pending-text-node
)))
579 (defmethod sax:characters
((handler text-normalizer
) data
)
580 (write-string data
(pending-text-node handler
)))
582 (defun flush-pending (handler)
583 (let ((str (get-output-stream-string (pending-text-node handler
))))
584 (unless (zerop (length str
))
585 (sax:characters
(cxml:proxy-chained-handler handler
) str
))))
587 (defmethod sax:start-element
:before
588 ((handler text-normalizer
) uri lname qname attributes
)
589 (declare (ignore uri lname qname attributes
))
590 (flush-pending handler
))
592 (defmethod sax:end-element
:before
593 ((handler text-normalizer
) uri lname qname
)
594 (declare (ignore uri lname qname
))
595 (flush-pending handler
))