1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; An implementation of James Clark's algorithm for RELAX NG validation.
4 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 (in-package :cxml-rng
)
33 (defvar *empty
* (make-empty))
34 (defvar *not-allowed
* (make-not-allowed))
37 (defun make-validator (grammar)
38 (let* ((table (ensure-registratur grammar
))
39 (start (parsed-grammar-interned-start grammar
))
41 (make-instance 'validator
43 :current-pattern start
)))
44 (make-instance 'text-normalizer
:chained-handler validator
)))
49 (defgeneric contains
(nc uri lname
))
51 (defmethod contains ((nc any-name
) uri lname
)
52 (let ((except (any-name-except nc
)))
54 (not (contains except uri lname
))
57 (defmethod contains ((nc ns-name
) uri lname
)
58 (and (equal (ns-name-uri nc
) uri
)
59 (let ((except (ns-name-except nc
)))
61 (not (contains except uri lname
))
64 (defmethod contains ((nc name
) uri lname
)
65 (and (equal (name-uri nc
) uri
)
66 (equal (name-lname nc
) lname
)))
68 (defmethod contains ((nc name-class-choice
) uri lname
)
69 (or (contains (name-class-choice-a nc
) uri lname
)
70 (contains (name-class-choice-b nc
) uri lname
)))
75 (defgeneric nullable
(pattern))
77 (defmethod nullable ((pattern group
))
78 (and (nullable (pattern-a pattern
))
79 (nullable (pattern-b pattern
))))
81 (defmethod nullable ((pattern interleave
))
82 (and (nullable (pattern-a pattern
))
83 (nullable (pattern-b pattern
))))
85 (defmethod nullable ((pattern choice
))
86 (or (nullable (pattern-a pattern
))
87 (nullable (pattern-b pattern
))))
89 (defmethod nullable ((pattern one-or-more
))
90 (nullable (pattern-child pattern
)))
92 (defmethod nullable ((pattern element
)) nil
)
93 (defmethod nullable ((pattern attribute
)) nil
)
94 (defmethod nullable ((pattern list-pattern
)) nil
)
95 (defmethod nullable ((pattern value
)) nil
)
96 (defmethod nullable ((pattern data
)) nil
)
97 (defmethod nullable ((pattern not-allowed
)) nil
)
98 (defmethod nullable ((pattern after
)) nil
)
100 (defmethod nullable ((pattern empty
)) t
)
101 (defmethod nullable ((pattern text
)) t
)
106 (defclass validator
(sax:sax-parser-mixin
)
107 ((current-pattern :initarg
:current-pattern
:accessor current-pattern
)
108 (after-start-tag-p :accessor after-start-tag-p
)
109 (pending-text-node :initform nil
:accessor pending-text-node
)
110 (registratur :initarg
:registratur
:accessor registratur
)))
112 (defun advance (hsx pattern message
)
113 (when (typep pattern
'not-allowed
)
114 (rng-error hsx
"~A, was expecting a ~A"
116 (replace-scary-characters (current-pattern hsx
))))
118 (write-line (replace-scary-characters (current-pattern hsx
))))
119 (setf (current-pattern hsx
) pattern
))
121 ;; make sure slime doesn't die
122 (defun replace-scary-characters (pattern)
123 (let ((str (write-to-string pattern
130 when
(>= (char-code c
) 128)
131 do
(setf (elt str i
) #\?))
134 (defmethod sax:characters
((hsx validator
) data
)
135 (assert (null (pending-text-node hsx
))) ;parser must be normalize
136 (if (after-start-tag-p hsx
)
137 (setf (pending-text-node hsx
) data
)
138 (unless (whitespacep data
)
139 ;; we already saw an element sibling, so discard whitespace
141 (text\' hsx
(current-pattern hsx
) data
)
142 "text node not valid")))
143 (setf (after-start-tag-p hsx
) nil
))
145 (defmethod sax:start-element
((hsx validator
) uri lname qname attributes
)
146 (declare (ignore qname
))
147 (when (pending-text-node hsx
)
148 ;; text node was the previous child, and we're in element content.
149 ;; process non-whitespace now; discard whitespace completely
150 (let ((data (pending-text-node hsx
)))
151 (unless (whitespacep data
)
153 (text\' hsx
(current-pattern hsx
) data
)
155 (setf (pending-text-node hsx
) nil
))
157 (remove-if (cxml::compose
#'cxml
::xmlns-attr-p
#'sax
:attribute-qname
)
159 (let* ((p0 (current-pattern hsx
))
160 (p1 (open-start-tag\' hsx p0 uri lname
))
162 (advance hsx p1
"element not valid")
163 (attributes\' hsx p1 attributes
)))
165 (advance hsx p2
"attributes not valid")
166 (close-start-tag\' hsx p2
))))
167 (advance hsx p3
"attributes not valid")
168 (setf (after-start-tag-p hsx
) t
)))
170 (defmethod sax:end-element
((hsx validator
) uri lname qname
)
171 (declare (ignore uri lname qname
))
172 (when (after-start-tag-p hsx
)
173 ;; nothing at all? pretend we saw whitespace.
174 (sax:characters hsx
""))
175 (when (pending-text-node hsx
)
176 ;; text node was the only child?
177 ;; process it and handle whitespace specially
178 (let* ((current (current-pattern hsx
))
179 (data (pending-text-node hsx
))
180 (next (text\' hsx current data
)))
182 (if (whitespacep data
)
183 (intern-choice hsx current next
)
185 "text node not valid"))
186 (setf (pending-text-node hsx
) nil
))
188 (end-tag\' hsx
(current-pattern hsx
))
189 "end of element not valid"))
194 (defgeneric text
\' (handler pattern data
))
196 (defmethod text\' (hsx (pattern choice
) data
)
198 (text\' hsx
(pattern-a pattern
) data
)
199 (text\' hsx
(pattern-b pattern
) data
)))
201 (defmethod text\' (hsx (pattern interleave
) data
)
202 (let ((a (pattern-a pattern
))
203 (b (pattern-b pattern
)))
205 (intern-interleave hsx
(text\' hsx a data
) b
)
206 (intern-interleave hsx a
(text\' hsx b data
)))))
208 (defmethod text\' (hsx (pattern group
) data
)
209 (let* ((a (pattern-a pattern
))
210 (b (pattern-b pattern
))
211 (p (intern-group hsx
(text\' hsx a data
) b
)))
213 (intern-choice hsx p
(text\' hsx b data
))
216 (defmethod text\' (hsx (pattern after
) data
)
218 (text\' hsx
(pattern-a pattern
) data
)
219 (pattern-b pattern
)))
221 (defmethod text\' (hsx (pattern one-or-more
) data
)
222 (let ((child (pattern-child pattern
)))
224 (text\' hsx child data
)
225 (intern-zero-or-more hsx child
))))
227 (defmethod text\' (hsx (pattern text
) data
)
228 (declare (ignore data
))
232 (if ok
*empty
* *not-allowed
*))
234 (defmethod text\' (hsx (pattern value
) data
)
235 (eat (equal* (pattern-datatype-library pattern
)
236 (pattern-type pattern
)
237 (pattern-string pattern
)
240 (defmethod text\' (hsx (pattern data
) data
)
241 (eat (and (typep* (pattern-datatype-library pattern
)
242 (pattern-type pattern
)
244 (let ((except (pattern-except pattern
)))
245 (not (and except
(nullable (text\' hsx except data
))))))))
247 (defmethod text\' (hsx (pattern list-pattern
) data
)
248 (eat (nullable (list\' hsx
(pattern-child pattern
) (words data
)))))
250 (defmethod text\' (hsx pattern data
)
251 (declare (ignore pattern data
))
254 (defun list\' (hsx pattern words
)
256 (setf pattern
(text\' hsx pattern word
)))
260 (cl-ppcre:split
#.
(format nil
"[~A]+" *whitespace
*) str
))
265 (defmacro ensuref
(key table value
)
266 `(ensure-hash ,key
,table
(lambda () ,value
)))
268 (defun ensure-hash (key table fn
)
269 (or (gethash key table
)
270 (setf (gethash key table
) (funcall fn
))))
272 (defgeneric intern-choice
(handler a b
))
273 (defmethod intern-choice (hsx a
(b not-allowed
)) a
)
274 (defmethod intern-choice (hsx (a not-allowed
) b
) b
)
275 (defmethod intern-choice (hsx a b
)
276 (ensuref (list 'choice a b
) (registratur hsx
) (make-choice a b
)))
278 (defgeneric intern-group
(handler a b
))
279 (defmethod intern-group (hsx (a pattern
) (b not-allowed
)) b
)
280 (defmethod intern-group (hsx (a not-allowed
) (b pattern
)) a
)
281 (defmethod intern-group (hsx a
(b empty
)) a
)
282 (defmethod intern-group (hsx (a empty
) b
) b
)
283 (defmethod intern-group (hsx a b
)
284 (ensuref (list 'group a b
) (registratur hsx
) (make-group a b
)))
286 (defgeneric intern-interleave
(handler a b
))
287 (defmethod intern-interleave (hsx (a pattern
) (b not-allowed
)) b
)
288 (defmethod intern-interleave (hsx (a not-allowed
) (b pattern
)) a
)
289 (defmethod intern-interleave (hsx a
(b empty
)) a
)
290 (defmethod intern-interleave (hsx (a empty
) b
) b
)
291 (defmethod intern-interleave (hsx a b
)
292 (ensuref (list 'interleave a b
) (registratur hsx
) (make-interleave a b
)))
294 (defgeneric intern-after
(handler a b
))
295 (defmethod intern-after (hsx (a pattern
) (b not-allowed
)) b
)
296 (defmethod intern-after (hsx (a not-allowed
) (b pattern
)) a
)
297 (defmethod intern-after (hsx a b
)
298 (ensuref (list 'after a b
) (registratur hsx
) (make-after a b
)))
300 (defgeneric intern-one-or-more
(handler c
))
301 (defmethod intern-one-or-more (hsx (c not-allowed
)) c
)
302 (defmethod intern-one-or-more (hsx c
)
303 (ensuref (list 'one-or-more c
) (registratur hsx
) (make-one-or-more c
)))
306 ;;;; ENSURE-REGISTRATUR
308 (defvar *seen-elements
*)
310 (defun ensure-registratur (grammar)
311 (or (parsed-grammar-registratur grammar
)
312 (setf (parsed-grammar-registratur grammar
)
313 (let ((table (make-hash-table :test
'equal
))
314 (*seen-elements
* '())
316 (setf (parsed-grammar-interned-start grammar
)
317 (intern-pattern (parsed-grammar-pattern grammar
) table
))
319 for elements
= *seen-elements
*
321 (setf *seen-elements
* nil
)
322 (dolist (pattern elements
)
323 (unless (find pattern done-elements
)
324 (push pattern done-elements
)
325 (setf (pattern-child pattern
)
326 (intern-pattern (pattern-child pattern
) table
)))))
329 ;;; FIXME: misnamed. we don't really intern the originals pattern yet.
331 (defgeneric intern-pattern
(pattern table
))
333 (defmethod intern-pattern ((pattern element
) table
)
334 (pushnew pattern
*seen-elements
*)
337 (defmethod intern-pattern ((pattern %parent
) table
)
338 (let ((c (intern-pattern (pattern-child pattern
) table
)))
339 (if (eq c
(pattern-child pattern
))
341 (let ((copy (copy-structure pattern
)))
342 (setf (pattern-child copy
) c
)
345 (defmethod intern-pattern ((pattern %combination
) table
)
346 (let ((a (intern-pattern (pattern-a pattern
) table
))
347 (b (intern-pattern (pattern-b pattern
) table
)))
348 (if (and (eq a
(pattern-a pattern
)) (eq b
(pattern-b pattern
)))
350 (let ((copy (copy-structure pattern
)))
351 (setf (pattern-a copy
) a
)
352 (setf (pattern-b copy
) b
)
355 (defmethod intern-pattern ((pattern data
) table
)
356 (let ((e (when (pattern-except pattern
)
357 (intern-pattern (pattern-except pattern
) table
))))
358 (if (eq e
(pattern-except pattern
))
360 (let ((copy (copy-structure pattern
)))
361 (setf (pattern-except copy
) e
)
364 (defmethod intern-pattern ((pattern ref
) table
)
365 (intern-pattern (defn-child (pattern-target pattern
)) table
))
367 (defmethod intern-pattern ((pattern empty
) table
)
370 (defmethod intern-pattern ((pattern not-allowed
) table
)
373 (defmethod intern-pattern ((pattern %leaf
) table
)
377 ;;;; built-in data type library
381 (defun equal* (dl type a b
)
382 (unless (equal dl
"")
383 (error "data type library not found: ~A" dl
))
384 (ecase (find-symbol type
:keyword
)
385 (:|string|
(equal a b
))
386 (:|token|
(equal (normalize-whitespace a
) (normalize-whitespace b
)))))
388 (defun typep* (dl type str
)
389 (declare (ignore str
))
390 (unless (equal dl
"")
391 (error "data type library not found: ~A" dl
))
392 (ecase (find-symbol type
:keyword
)
393 ((:|string|
:|token|
) t
)))
395 (defun normalize-whitespace (str)
396 (cl-ppcre:regex-replace-all
#.
(format nil
"[~A]+" *whitespace
*)
397 (string-trim *whitespace
* str
)
403 (defgeneric apply-after
(handler fn pattern
))
405 (defmethod apply-after (hsx fn
(pattern after
))
408 (funcall fn
(pattern-b pattern
))))
410 (defmethod apply-after (hsx fn
(pattern choice
))
412 (apply-after hsx fn
(pattern-a pattern
))
413 (apply-after hsx fn
(pattern-b pattern
))))
415 (defmethod apply-after (hsx fn
(pattern not-allowed
))
416 (declare (ignore hsx fn
))
422 (defgeneric open-start-tag
\' (handler pattern uri lname
))
424 (defmethod open-start-tag\' (hsx (pattern choice
) uri lname
)
426 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)
427 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
)))
429 (defmethod open-start-tag\' (hsx (pattern element
) uri lname
)
430 (if (contains (pattern-name pattern
) (or uri
"") lname
)
431 (intern-after hsx
(pattern-child pattern
) *empty
*)
434 (defmethod open-start-tag\' (hsx (pattern interleave
) uri lname
)
438 (lambda (p) (intern-interleave hsx p
(pattern-b pattern
)))
439 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))
442 (lambda (p) (intern-interleave hsx
(pattern-a pattern
) p
))
443 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))))
445 (defun intern-zero-or-more (hsx c
)
446 (intern-choice hsx
(intern-one-or-more hsx c
) *empty
*))
448 (defmethod open-start-tag\' (hsx (pattern one-or-more
) uri lname
)
449 (let ((c (intern-zero-or-more hsx
(pattern-child pattern
))))
451 (lambda (p) (intern-group hsx p c
))
452 (open-start-tag\' hsx
(pattern-child pattern
) uri lname
))))
454 (defmethod open-start-tag\' (hsx (pattern group
) uri lname
)
455 (let ((x (apply-after hsx
457 (intern-group hsx p
(pattern-b pattern
)))
458 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))))
459 (if (nullable (pattern-a pattern
))
462 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))
465 (defmethod open-start-tag\' (hsx (pattern after
) uri lname
)
468 (intern-after hsx p
(pattern-b pattern
)))
469 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)))
471 (defmethod open-start-tag\' (hsx pattern uri lname
)
472 (declare (ignore hsx pattern uri lname
))
478 (defun attributes\' (handler pattern attributes
)
479 (dolist (a attributes
)
480 (setf pattern
(attribute\' handler pattern a
)))
483 (defgeneric attribute
\' (handler pattern attribute
))
485 (defmethod attribute\' (hsx (pattern after
) a
)
487 (attribute\' hsx
(pattern-a pattern
) a
)
488 (pattern-b pattern
)))
490 (defmethod attribute\' (hsx (pattern choice
) a
)
492 (attribute\' hsx
(pattern-a pattern
) a
)
493 (attribute\' hsx
(pattern-b pattern
) a
)))
495 (defmethod attribute\' (hsx (pattern group
) a
)
498 (attribute\' hsx
(pattern-a pattern
) a
)
502 (attribute\' hsx
(pattern-b pattern
) a
))))
504 (defmethod attribute\' (hsx (pattern interleave
) a
)
506 (intern-interleave hsx
507 (attribute\' hsx
(pattern-a pattern
) a
)
509 (intern-interleave hsx
511 (attribute\' hsx
(pattern-b pattern
) a
))))
513 (defmethod attribute\' (hsx (pattern one-or-more
) a
)
515 (attribute\' hsx
(pattern-child pattern
) a
)
516 (intern-zero-or-more hsx
(pattern-child pattern
))))
518 (defmethod attribute\' (hsx (pattern attribute
) a
)
519 (eat (and (contains (pattern-name pattern
)
520 (or (sax:attribute-namespace-uri a
) "")
521 (sax:attribute-local-name a
))
523 (pattern-child pattern
)
524 (sax:attribute-value a
)))))
526 (defun value-matches-p (hsx pattern value
)
527 (or (and (nullable pattern
) (whitespacep value
))
528 (nullable (text\' hsx pattern value
))))
530 (defun whitespacep (str)
531 (zerop (length (string-trim *whitespace
* str
))))
533 (defmethod attribute\' (hsx pattern a
)
534 (declare (ignore hsx pattern a
))
538 ;;;; CLOSE-START-TAG'
540 (defgeneric close-start-tag
\' (handler pattern
))
542 (defmethod close-start-tag\' (hsx (pattern after
))
544 (close-start-tag\' hsx
(pattern-a pattern
))
545 (pattern-b pattern
)))
547 (defmethod close-start-tag\' (hsx (pattern choice
))
549 (close-start-tag\' hsx
(pattern-a pattern
))
550 (close-start-tag\' hsx
(pattern-b pattern
))))
552 (defmethod close-start-tag\' (hsx (pattern group
))
554 (close-start-tag\' hsx
(pattern-a pattern
))
555 (close-start-tag\' hsx
(pattern-b pattern
))))
557 (defmethod close-start-tag\' (hsx (pattern interleave
))
558 (intern-interleave hsx
559 (close-start-tag\' hsx
(pattern-a pattern
))
560 (close-start-tag\' hsx
(pattern-b pattern
))))
562 (defmethod close-start-tag\' (hsx (pattern one-or-more
))
563 (intern-one-or-more hsx
(close-start-tag\' hsx
(pattern-child pattern
))))
565 (defmethod close-start-tag\' (hsx (pattern attribute
))
566 (declare (ignore hsx
))
569 (defmethod close-start-tag\' (hsx pattern
)
570 (declare (ignore hsx
))
576 (defgeneric end-tag
\' (handler pattern
))
578 (defmethod end-tag\' (hsx (pattern choice
))
580 (end-tag\' hsx
(pattern-a pattern
))
581 (end-tag\' hsx
(pattern-b pattern
))))
583 (defmethod end-tag\' (hsx (pattern after
))
584 (if (nullable (pattern-a pattern
))
588 (defmethod end-tag\' (hsx pattern
)
589 (declare (ignore hsx pattern
))
595 ;;; FIXME: cxml should do that
597 ;;; FIXME: since we ignore PI, CDATA, and comment events, we should probably
598 ;;; discard them properly.
600 (defclass text-normalizer
(cxml:sax-proxy
)
601 ((pending-text-node :initform
(make-string-output-stream)
602 :accessor pending-text-node
)))
604 (defmethod sax:characters
((handler text-normalizer
) data
)
605 (write-string data
(pending-text-node handler
)))
607 (defun flush-pending (handler)
608 (let ((str (get-output-stream-string (pending-text-node handler
))))
609 (unless (zerop (length str
))
610 (sax:characters
(cxml:proxy-chained-handler handler
) str
))))
612 (defmethod sax:start-element
:before
613 ((handler text-normalizer
) uri lname qname attributes
)
614 (declare (ignore uri lname qname attributes
))
615 (flush-pending handler
))
617 (defmethod sax:end-element
:before
618 ((handler text-normalizer
) uri lname qname
)
619 (declare (ignore uri lname qname
))
620 (flush-pending handler
))