1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; An implementation of James Clark's algorithm for RELAX NG validation.
4 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 (in-package :cxml-rng
)
33 (defvar *empty
* (make-empty))
34 (defvar *not-allowed
* (make-not-allowed))
37 (defun make-validator (grammar)
38 (make-instance 'text-normalizer
40 (make-instance 'validator
:current-pattern grammar
)))
45 (defgeneric contains
(nc uri lname
))
47 (defmethod contains ((nc any-name
) uri lname
)
48 (let ((except (any-name-except nc
)))
50 (not (contains except uri lname
))
53 (defmethod contains ((nc ns-name
) uri lname
)
54 (and (equal (ns-name-uri nc
) uri
)
55 (let ((except (ns-name-except nc
)))
57 (not (contains except uri lname
))
60 (defmethod contains ((nc name
) uri lname
)
61 (and (equal (name-uri nc
) uri
)
62 (equal (name-lname nc
) lname
)))
64 (defmethod contains ((nc name-class-choice
) uri lname
)
65 (or (contains (name-class-choice-a nc
) uri lname
)
66 (contains (name-class-choice-b nc
) uri lname
)))
71 (defgeneric nullable
(pattern))
73 (defmethod nullable ((pattern group
))
74 (and (nullable (pattern-a pattern
))
75 (nullable (pattern-b pattern
))))
77 (defmethod nullable ((pattern interleave
))
78 (and (nullable (pattern-a pattern
))
79 (nullable (pattern-b pattern
))))
81 (defmethod nullable ((pattern choice
))
82 (or (nullable (pattern-a pattern
))
83 (nullable (pattern-b pattern
))))
85 (defmethod nullable ((pattern one-or-more
))
86 (nullable (pattern-child pattern
)))
88 (defmethod nullable ((pattern element
)) nil
)
89 (defmethod nullable ((pattern attribute
)) nil
)
90 (defmethod nullable ((pattern list-pattern
)) nil
)
91 (defmethod nullable ((pattern value
)) nil
)
92 (defmethod nullable ((pattern data
)) nil
)
93 (defmethod nullable ((pattern not-allowed
)) nil
)
94 (defmethod nullable ((pattern after
)) nil
)
96 (defmethod nullable ((pattern empty
)) t
)
97 (defmethod nullable ((pattern text
)) t
)
102 (defclass validator
(sax:sax-parser-mixin
)
103 ((current-pattern :initarg
:current-pattern
:accessor current-pattern
)
104 (after-start-tag-p :accessor after-start-tag-p
)
105 (pending-text-node :initform nil
:accessor pending-text-node
)
106 (registratur :initform
(make-hash-table :test
'equal
) :reader registratur
)))
108 (defun advance (hsx pattern message
)
109 (when (typep pattern
'not-allowed
)
110 (rng-error hsx
"~A, was expecting a ~A"
112 (replace-scary-characters (current-pattern hsx
))))
114 (write-line (replace-scary-characters (current-pattern hsx
))))
115 (setf (current-pattern hsx
) pattern
))
117 ;; make sure slime doesn't die
118 (defun replace-scary-characters (pattern)
119 (let ((str (write-to-string pattern
126 when
(>= (char-code c
) 128)
127 do
(setf (elt str i
) #\?))
130 (defmethod sax:characters
((hsx validator
) data
)
131 (assert (null (pending-text-node hsx
))) ;parser must be normalize
132 (if (after-start-tag-p hsx
)
133 (setf (pending-text-node hsx
) data
)
134 (unless (whitespacep data
)
135 ;; we already saw an element sibling, so discard whitespace
137 (text\' hsx
(current-pattern hsx
) data
)
138 "text node not valid")))
139 (setf (after-start-tag-p hsx
) nil
))
141 (defmethod sax:start-element
((hsx validator
) uri lname qname attributes
)
142 (declare (ignore qname
))
143 (when (pending-text-node hsx
)
144 ;; text node was the previous child, and we're in element content.
145 ;; process non-whitespace now; discard whitespace completely
146 (let ((data (pending-text-node hsx
)))
147 (unless (whitespacep data
)
148 (setf (current-pattern hsx
)
149 (text\' hsx
(current-pattern hsx
) data
))
150 (check-allowed hsx
"text node")))
151 (setf (pending-text-node hsx
) nil
))
152 (let* ((p0 (current-pattern hsx
))
153 (p1 (open-start-tag\' hsx p0 uri lname
))
155 (advance hsx p1
"element not valid")
156 (attributes\' hsx p1 attributes
)))
158 (advance hsx p2
"attributes not valid")
159 (close-start-tag\' hsx p2
))))
160 (advance hsx p3
"attributes not valid")
161 (setf (after-start-tag-p hsx
) t
)))
163 (defmethod sax:end-element
((hsx validator
) uri lname qname
)
164 (declare (ignore uri lname qname
))
165 (when (after-start-tag-p hsx
)
166 ;; nothing at all? pretend we saw whitespace.
167 (sax:characters hsx
""))
168 (when (pending-text-node hsx
)
169 ;; text node was the only child?
170 ;; process it and handle whitespace specially
171 (let* ((current (current-pattern hsx
))
172 (data (pending-text-node hsx
))
173 (next (text\' hsx current data
)))
175 (if (whitespacep data
)
176 (intern-choice hsx current next
)
178 "text node not valid"))
179 (setf (pending-text-node hsx
) nil
))
181 (end-tag\' hsx
(current-pattern hsx
))
182 "end of element not valid"))
187 (defgeneric text
\' (handler pattern data
))
189 (defmethod text\' (hsx (pattern choice
) data
)
191 (text\' hsx
(pattern-a pattern
) data
)
192 (text\' hsx
(pattern-b pattern
) data
)))
194 (defmethod text\' (hsx (pattern interleave
) data
)
195 (let ((a (pattern-a pattern
))
196 (b (pattern-b pattern
)))
198 (intern-interleave hsx
(text\' hsx a data
) b
)
199 (intern-interleave hsx a
(text\' hsx b data
)))))
201 (defmethod text\' (hsx (pattern group
) data
)
202 (let* ((a (pattern-a pattern
))
203 (b (pattern-b pattern
))
204 (p (intern-group hsx
(text\' hsx a data
) b
)))
206 (intern-choice hsx p
(text\' hsx b data
))
209 (defmethod text\' (hsx (pattern after
) data
)
211 (text\' hsx
(pattern-a pattern
) data
)
212 (pattern-b pattern
)))
214 (defmethod text\' (hsx (pattern one-or-more
) data
)
215 (let ((child (pattern-child pattern
)))
217 (text\' hsx child data
)
218 (intern-zero-or-more hsx child
))))
220 (defmethod text\' (hsx (pattern text
) data
)
221 (declare (ignore data
))
225 (if ok
*empty
* *not-allowed
*))
227 (defmethod text\' (hsx (pattern value
) data
)
228 (eat (equal* (pattern-datatype-library pattern
)
229 (pattern-type pattern
)
230 (pattern-string pattern
)
233 (defmethod text\' (hsx (pattern data
) data
)
234 (eat (and (typep* (pattern-datatype-library pattern
)
235 (pattern-type pattern
)
237 (let ((except (pattern-except pattern
)))
238 (not (and except
(nullable (text\' hsx except data
))))))))
240 (defmethod text\' (hsx (pattern list-pattern
) data
)
241 (eat (nullable (list\' hsx
(pattern-child pattern
) (words data
)))))
243 (defmethod text\' (hsx pattern data
)
244 (declare (ignore pattern data
))
247 (defun list\' (hsx pattern words
)
249 (setf pattern
(text\' hsx pattern word
)))
253 (cl-ppcre:split
#.
(format nil
"[~A]+" *whitespace
*) str
))
258 (defmacro ensuref
(key table value
)
259 `(ensure-hash ,key
,table
(lambda () ,value
)))
261 (defun ensure-hash (key table fn
)
262 (or (gethash key table
)
263 (setf (gethash key table
) (funcall fn
))))
265 (defgeneric intern-choice
(handler a b
))
266 (defmethod intern-choice (hsx a
(b not-allowed
)) a
)
267 (defmethod intern-choice (hsx (a not-allowed
) b
) b
)
268 (defmethod intern-choice (hsx a b
)
269 (ensuref (list :choice a b
) (registratur hsx
) (make-choice a b
)))
271 (defgeneric intern-group
(handler a b
))
272 (defmethod intern-group (hsx (a pattern
) (b not-allowed
)) b
)
273 (defmethod intern-group (hsx (a not-allowed
) (b pattern
)) a
)
274 (defmethod intern-group (hsx a
(b empty
)) a
)
275 (defmethod intern-group (hsx (a empty
) b
) b
)
276 (defmethod intern-group (hsx a b
)
277 (ensuref (list :group a b
) (registratur hsx
) (make-group a b
)))
279 (defgeneric intern-interleave
(handler a b
))
280 (defmethod intern-interleave (hsx (a pattern
) (b not-allowed
)) b
)
281 (defmethod intern-interleave (hsx (a not-allowed
) (b pattern
)) a
)
282 (defmethod intern-interleave (hsx a
(b empty
)) a
)
283 (defmethod intern-interleave (hsx (a empty
) b
) b
)
284 (defmethod intern-interleave (hsx a b
)
285 (ensuref (list :interleave a b
) (registratur hsx
) (make-interleave a b
)))
287 (defgeneric intern-after
(handler a b
))
288 (defmethod intern-after (hsx (a pattern
) (b not-allowed
)) b
)
289 (defmethod intern-after (hsx (a not-allowed
) (b pattern
)) a
)
290 (defmethod intern-after (hsx a b
)
291 (ensuref (list :after a b
) (registratur hsx
) (make-after a b
)))
293 (defgeneric intern-one-or-more
(handler c
))
294 (defmethod intern-one-or-more (hsx (c not-allowed
)) c
)
295 (defmethod intern-one-or-more (hsx c
)
296 (ensuref (list :one-or-more c
) (registratur hsx
) (make-one-or-more c
)))
299 ;;;; built-in data type library
303 (defun equal* (dl type a b
)
304 (unless (equal dl
"")
305 (error "data type library not found: ~A" dl
))
306 (ecase (find-symbol type
:keyword
)
307 (:|string|
(equal a b
))
308 (:|token|
(equal (normalize-whitespace a
) (normalize-whitespace b
)))))
310 (defun typep* (dl type str
)
311 (declare (ignore str
))
312 (unless (equal dl
"")
313 (error "data type library not found: ~A" dl
))
314 (ecase (find-symbol type
:keyword
)
315 ((:|string|
:|token|
) t
)))
317 (defun normalize-whitespace (str)
318 (cl-ppcre:regex-replace-all
#.
(format nil
"[~A]+" *whitespace
*)
319 (string-trim *whitespace
* str
)
325 (defgeneric apply-after
(handler fn pattern
))
327 (defmethod apply-after (hsx fn
(pattern after
))
330 (funcall fn
(pattern-b pattern
))))
332 (defmethod apply-after (hsx fn
(pattern choice
))
334 (apply-after hsx fn
(pattern-a pattern
))
335 (apply-after hsx fn
(pattern-b pattern
))))
337 (defmethod apply-after (hsx fn
(pattern not-allowed
))
338 (declare (ignore hsx fn
))
344 (defgeneric open-start-tag
\' (handler pattern uri lname
))
346 (defmethod open-start-tag\' (hsx (pattern choice
) uri lname
)
348 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)
349 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
)))
351 (defmethod open-start-tag\' (hsx (pattern element
) uri lname
)
352 (if (contains (pattern-name pattern
) (or uri
"") lname
)
353 (intern-after hsx
(pattern-child pattern
) *empty
*)
356 (defmethod open-start-tag\' (hsx (pattern interleave
) uri lname
)
360 (lambda (p) (intern-interleave hsx p
(pattern-b pattern
)))
361 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))
364 (lambda (p) (intern-interleave hsx
(pattern-a pattern
) p
))
365 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))))
367 (defun intern-zero-or-more (hsx c
)
368 (intern-choice hsx
(intern-one-or-more hsx c
) *empty
*))
370 (defmethod open-start-tag\' (hsx (pattern one-or-more
) uri lname
)
371 (let ((c (intern-zero-or-more hsx
(pattern-child pattern
))))
373 (lambda (p) (intern-group hsx p c
))
374 (open-start-tag\' hsx
(pattern-child pattern
) uri lname
))))
376 (defmethod open-start-tag\' (hsx (pattern group
) uri lname
)
377 (let ((x (apply-after hsx
379 (intern-group hsx p
(pattern-b pattern
)))
380 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
))))
381 (if (nullable (pattern-a pattern
))
384 (open-start-tag\' hsx
(pattern-b pattern
) uri lname
))
387 (defmethod open-start-tag\' (hsx (pattern after
) uri lname
)
390 (intern-after hsx p
(pattern-b pattern
)))
391 (open-start-tag\' hsx
(pattern-a pattern
) uri lname
)))
393 (defmethod open-start-tag\' (hsx pattern uri lname
)
394 (declare (ignore hsx pattern uri lname
))
400 (defun attributes\' (handler pattern attributes
)
401 (dolist (a attributes
)
402 (setf pattern
(attribute\' handler pattern a
)))
405 (defgeneric attribute
\' (handler pattern attribute
))
407 (defmethod attribute\' (hsx (pattern after
) a
)
409 (attribute\' hsx
(pattern-a pattern
) a
)
410 (pattern-b pattern
)))
412 (defmethod attribute\' (hsx (pattern choice
) a
)
414 (attribute\' hsx
(pattern-a pattern
) a
)
415 (attribute\' hsx
(pattern-b pattern
) a
)))
417 (defmethod attribute\' (hsx (pattern group
) a
)
420 (attribute\' hsx
(pattern-a pattern
) a
)
424 (attribute\' hsx
(pattern-b pattern
) a
))))
426 (defmethod attribute\' (hsx (pattern interleave
) a
)
428 (intern-interleave hsx
429 (attribute\' hsx
(pattern-a pattern
) a
)
431 (intern-interleave hsx
433 (attribute\' hsx
(pattern-b pattern
) a
))))
435 (defmethod attribute\' (hsx (pattern one-or-more
) a
)
437 (attribute\' hsx
(pattern-child pattern
) a
)
438 (intern-zero-or-more hsx
(pattern-child pattern
))))
440 (defmethod attribute\' (hsx (pattern attribute
) a
)
441 (eat (and (contains (pattern-name pattern
)
442 (or (sax:attribute-namespace-uri a
) "")
443 (sax:attribute-local-name a
))
445 (pattern-child pattern
)
446 (sax:attribute-value a
)))))
448 (defun value-matches-p (hsx pattern value
)
449 (or (and (nullable pattern
) (whitespacep value
))
450 (nullable (text\' hsx pattern value
))))
452 (defun whitespacep (str)
453 (zerop (length (string-trim *whitespace
* str
))))
455 (defmethod attribute\' (hsx pattern a
)
456 (declare (ignore hsx pattern a
))
460 ;;;; CLOSE-START-TAG'
462 (defgeneric close-start-tag
\' (handler pattern
))
464 (defmethod close-start-tag\' (hsx (pattern after
))
466 (close-start-tag\' hsx
(pattern-a pattern
))
467 (pattern-b pattern
)))
469 (defmethod close-start-tag\' (hsx (pattern choice
))
471 (close-start-tag\' hsx
(pattern-a pattern
))
472 (close-start-tag\' hsx
(pattern-b pattern
))))
474 (defmethod close-start-tag\' (hsx (pattern group
))
476 (close-start-tag\' hsx
(pattern-a pattern
))
477 (close-start-tag\' hsx
(pattern-b pattern
))))
479 (defmethod close-start-tag\' (hsx (pattern interleave
))
480 (intern-interleave hsx
481 (close-start-tag\' hsx
(pattern-a pattern
))
482 (close-start-tag\' hsx
(pattern-b pattern
))))
484 (defmethod close-start-tag\' (hsx (pattern one-or-more
))
485 (intern-one-or-more hsx
(close-start-tag\' hsx
(pattern-child pattern
))))
487 (defmethod close-start-tag\' (hsx (pattern attribute
))
488 (declare (ignore hsx
))
491 (defmethod close-start-tag\' (hsx pattern
)
492 (declare (ignore hsx
))
498 (defgeneric end-tag
\' (handler pattern
))
500 (defmethod end-tag\' (hsx (pattern choice
))
502 (end-tag\' hsx
(pattern-a pattern
))
503 (end-tag\' hsx
(pattern-b pattern
))))
505 (defmethod end-tag\' (hsx (pattern after
))
506 (if (nullable (pattern-a pattern
))
510 (defmethod end-tag\' (hsx pattern
)
511 (declare (ignore hsx pattern
))
517 ;;; FIXME: cxml should do that
519 ;;; FIXME: since we ignore PI, CDATA, and comment events, we should probably
520 ;;; discard them properly.
522 (defclass text-normalizer
(cxml:sax-proxy
)
523 ((pending-text-node :initform
(make-string-output-stream)
524 :accessor pending-text-node
)))
526 (defmethod sax:characters
((handler text-normalizer
) data
)
527 (write-string data
(pending-text-node handler
)))
529 (defun flush-pending (handler)
530 (let ((str (get-output-stream-string (pending-text-node handler
))))
531 (unless (zerop (length str
))
532 (sax:characters
(cxml:proxy-chained-handler handler
) str
))))
534 (defmethod sax:start-element
:before
535 ((handler text-normalizer
) uri lname qname attributes
)
536 (declare (ignore uri lname qname attributes
))
537 (flush-pending handler
))
539 (defmethod sax:end-element
:before
540 ((handler text-normalizer
) uri lname qname
)
541 (declare (ignore uri lname qname
))
542 (flush-pending handler
))