ns-handling (4.8 - 4.10)
[cxml-rng.git] / parse.lisp
blobaab90e8407ea3de3e131195131b9d1670edecb6a
1 (in-package :cxml-rng)
3 #+sbcl
4 (declaim (optimize (debug 2)))
7 ;;;; Errors
9 (define-condition rng-error (simple-error) ())
11 (defun rng-error (source fmt &rest args)
12 (let ((s (make-string-output-stream)))
13 (apply #'format s fmt args)
14 (when source
15 (format s "~& [ Error at line ~D, column ~D in ~S ]"
16 (klacks:current-line-number source)
17 (klacks:current-column-number source)
18 (klacks:current-system-id source)))
19 (error 'rng-error
20 :format-control "~A"
21 :format-arguments (list (get-output-stream-string s)))))
24 ;;;; Parser
26 (defvar *datatype-library*)
27 (defvar *namespace-uri*)
28 (defvar *entity-resolver*)
29 (defvar *external-href-stack*)
30 (defvar *include-uri-stack*)
32 (defvar *debug* nil)
34 (defun invoke-with-klacks-handler (fn source)
35 (if *debug*
36 (funcall fn)
37 (handler-case
38 (funcall fn)
39 (cxml:xml-parse-error (c)
40 (rng-error source "Cannot parse schema: ~A" c)))))
42 (defun parse-relax-ng (input &key entity-resolver)
43 (klacks:with-open-source (source (cxml:make-source input))
44 (invoke-with-klacks-handler
45 (lambda ()
46 (klacks:find-event source :start-element)
47 (let ((*datatype-library* "")
48 (*namespace-uri* "")
49 (*entity-resolver* entity-resolver)
50 (*external-href-stack* '())
51 (*include-uri-stack* '()))
52 (p/pattern source)))
53 source)))
56 ;;;; pattern structures
58 (defstruct pattern)
60 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
61 possibilities)
63 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
64 name)
66 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
67 children)
69 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
70 child)
72 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
73 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
74 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
75 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
76 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
77 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
78 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
79 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
81 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
83 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
85 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
86 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
88 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
89 datatype-library
90 type)
92 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
94 string)
96 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
97 params
98 except)
100 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
102 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
103 content)
106 ;;;; non-pattern
108 (defstruct param
109 name
110 string)
112 (defstruct start
113 combine
114 child)
116 (defstruct define
117 name
118 combine
119 children)
121 (defstruct div
122 content)
124 (defstruct include
125 href
126 content)
129 ;;;; parser
131 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
133 (defun skip-foreign* (source)
134 (loop
135 (case (klacks:peek-next source)
136 (:start-element (skip-foreign source))
137 (:end-element (return)))))
139 (defun skip-to-native (source)
140 (loop
141 (case (klacks:peek source)
142 (:start-element
143 (when (equal (klacks:current-uri source) *rng-namespace*)
144 (return))
145 (klacks:serialize-element source nil))
146 (:end-element (return)))
147 (klacks:consume source)))
149 (defun consume-and-skip-to-native (source)
150 (klacks:consume source)
151 (skip-to-native source))
153 (defun skip-foreign (source)
154 (when (equal (klacks:current-uri source) *rng-namespace*)
155 (rng-error source
156 "invalid schema: ~A not allowed here"
157 (klacks:current-lname source)))
158 (klacks:serialize-element source nil))
160 (defun attribute (lname attrs)
161 (let ((a (sax:find-attribute-ns "" lname attrs)))
162 (if a
163 (sax:attribute-value a)
164 nil)))
166 (defvar *whitespace*
167 (format nil "~C~C~C"
168 (code-char 9)
169 (code-char 32)
170 (code-char 13)
171 (code-char 10)))
173 (defun ntc (lname source-or-attrs)
174 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
175 (let* ((attrs
176 (if (listp source-or-attrs)
177 source-or-attrs
178 (klacks:list-attributes source-or-attrs)))
179 (a (sax:find-attribute-ns "" lname attrs)))
180 (if a
181 (string-trim *whitespace* (sax:attribute-value a))
182 nil)))
184 (defmacro with-library-and-ns (attrs &body body)
185 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
187 (defun invoke-with-library-and-ns (fn attrs)
188 (let* ((dl (attribute "datatypeLibrary" attrs))
189 (ns (attribute "ns" attrs))
190 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
191 (*namespace-uri* (or ns *namespace-uri*)))
192 (funcall fn)))
194 (defun p/pattern (source)
195 (let* ((lname (klacks:current-lname source))
196 (attrs (klacks:list-attributes source)))
197 (with-library-and-ns attrs
198 (case (find-symbol lname :keyword)
199 (:|element| (p/element source (ntc "name" attrs)))
200 (:|attribute| (p/attribute source (ntc "name" attrs)))
201 (:|group| (p/combination #'make-group source))
202 (:|interleave| (p/combination #'make-interleave source))
203 (:|choice| (p/combination #'make-choice source))
204 (:|optional| (p/combination #'make-optional source))
205 (:|zeroOrMore| (p/combination #'make-zero-or-more source))
206 (:|oneOrMore| (p/combination #'make-one-or-more source))
207 (:|list| (p/combination #'make-list-pattern source))
208 (:|mixed| (p/combination #'make-mixed source))
209 (:|ref| (p/ref source))
210 (:|parentRef| (p/parent-ref source))
211 (:|empty| (p/empty source))
212 (:|text| (p/text source))
213 (:|value| (p/value source))
214 (:|data| (p/data source))
215 (:|notAllowed| (p/not-allowed source))
216 (:|externalRef| (p/external-ref source))
217 (:|grammar| (p/grammar source))
218 (t (skip-foreign source))))))
220 (defun p/pattern+ (source)
221 (let ((children nil))
222 (loop
223 (case (klacks:peek source)
224 (:start-element
225 (let ((p (p/pattern source))) (when p (push p children))))
226 (:end-element
227 (return))
229 (klacks:consume source))))
230 (unless children
231 (rng-error source "empty element"))
232 (nreverse children)))
234 (defun p/pattern? (source)
235 (let ((result nil))
236 (loop
237 (skip-to-native source)
238 (case (klacks:peek source)
239 (:start-element
240 (when result
241 (rng-error source "at most one pattern expected here"))
242 (setf result (p/pattern source)))
243 (:end-element
244 (return))
246 (klacks:consume source))))
247 result))
249 (defun p/element (source name)
250 (klacks:expecting-element (source "element")
251 (let ((result (make-element)))
252 (consume-and-skip-to-native source)
253 (if name
254 (setf (pattern-name result)
255 (list :name name :uri *namespace-uri*))
256 (setf (pattern-name result) (p/name-class source)))
257 (skip-to-native source)
258 (setf (pattern-children result) (p/pattern+ source))
259 result)))
261 (defun p/attribute (source name)
262 (klacks:expecting-element (source "attribute")
263 (let ((result (make-attribute)))
264 (consume-and-skip-to-native source)
265 (if name
266 (setf (pattern-name result)
267 (list :name name :uri ""))
268 (setf (pattern-name result) (p/name-class source)))
269 (skip-to-native source)
270 (setf (pattern-child result) (p/pattern? source))
271 result)))
273 (defun p/combination (constructor source)
274 (klacks:expecting-element (source)
275 (consume-and-skip-to-native source)
276 (let ((possibilities (p/pattern+ source)))
277 (funcall constructor :possibilities possibilities))))
279 (defun p/ref (source)
280 (klacks:expecting-element (source "ref")
281 (prog1
282 (make-ref :name (ntc "name" source))
283 (skip-foreign* source))))
285 (defun p/parent-ref (source)
286 (klacks:expecting-element (source "parentRef")
287 (prog1
288 (make-parent-ref :name (ntc "name" source))
289 (skip-foreign* source))))
291 (defun p/empty (source)
292 (klacks:expecting-element (source "empty")
293 (skip-foreign* source)
294 (make-empty)))
296 (defun p/text (source)
297 (klacks:expecting-element (source "text")
298 (skip-foreign* source)
299 (make-text)))
301 (defun consume-and-parse-characters (source)
302 ;; fixme
303 (let ((tmp ""))
304 (loop
305 (multiple-value-bind (key data) (klacks:peek-next source)
306 (case key
307 (:characters
308 (setf tmp (concatenate 'string tmp data)))
309 (:end-element (return)))))
310 tmp))
312 (defun p/value (source)
313 (klacks:expecting-element (source "value")
314 (let* ((type (ntc "type" source))
315 (string (consume-and-parse-characters source))
316 (ns *namespace-uri*)
317 (dl *datatype-library*))
318 (unless type
319 (setf type "token")
320 (setf dl ""))
321 (make-value :string string :type type :ns ns :datatype-library dl))))
323 (defun p/data (source)
324 (klacks:expecting-element (source "data")
325 (let* ((type (ntc "type" source))
326 (result (make-data :type type
327 :datatype-library *datatype-library*
329 (params '()))
330 (loop
331 (multiple-value-bind (key uri lname)
332 (klacks:peek-next source)
334 (case key
335 (:start-element
336 (case (find-symbol lname :keyword)
337 (:|param| (push (p/param source) params))
338 (:|except|
339 (setf (pattern-except result) (p/except-pattern source))
340 (skip-to-native source)
341 (return))
342 (t (skip-foreign source))))
343 (:end-element
344 (return)))))
345 (setf (pattern-params result) (nreverse params))
346 result)))
348 (defun p/param (source)
349 (klacks:expecting-element (source "param")
350 (let ((name (ntc "name" source))
351 (string (consume-and-parse-characters source)))
352 (make-param :name name :string string))))
354 (defun p/except-pattern (source)
355 (klacks:expecting-element (source "except")
356 (with-library-and-ns (klacks:list-attributes source)
357 (klacks:consume source)
358 (p/pattern+ source))))
360 (defun p/not-allowed (source)
361 (klacks:expecting-element (source "notAllowed")
362 (consume-and-skip-to-native source)
363 (make-not-allowed)))
365 (defun safe-parse-uri (source str &optional base)
366 (when (zerop (length str))
367 (rng-error source "missing URI"))
368 (handler-case
369 (if base
370 (puri:merge-uris str base)
371 (puri:parse-uri str))
372 (puri:uri-parse-error ()
373 (rng-error source "invalid URI: ~A" str))))
375 (defun p/external-ref (source)
376 (klacks:expecting-element (source "externalRef")
377 (let* ((href
378 (escape-uri (attribute "href" (klacks:list-attributes source))))
379 (base (klacks:current-xml-base source))
380 (uri (safe-parse-uri source href base)))
381 (when (find uri *include-uri-stack* :test #'puri:uri=)
382 (rng-error source "looping include"))
383 (prog1
384 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
385 (xstream
386 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
387 (klacks:with-open-source (source (cxml:make-source xstream))
388 (invoke-with-klacks-handler
389 (lambda ()
390 (klacks:find-event source :start-element)
391 (let ((*datatype-library* ""))
392 (p/pattern source)))
393 source)))
394 (skip-foreign* source)))))
396 (defun p/grammar (source)
397 (klacks:expecting-element (source "grammar")
398 (consume-and-skip-to-native source)
399 (make-grammar :content (p/grammar-content* source))))
401 (defun p/grammar-content* (source &key disallow-include)
402 (let ((content nil))
403 (loop
404 (multiple-value-bind (key uri lname) (klacks:peek source)
406 (case key
407 (:start-element
408 (with-library-and-ns (klacks:list-attributes source)
409 (case (find-symbol lname :keyword)
410 (:|start| (push (p/start source) content))
411 (:|define| (push (p/define source) content))
412 (:|div| (push (p/div source) content))
413 (:|include|
414 (when disallow-include
415 (rng-error source "nested include not permitted"))
416 (push (p/include source) content))
417 (t (skip-foreign source)))))
418 (:end-element (return))))
419 (klacks:consume source))
420 (nreverse content)))
422 (defun p/start (source)
423 (klacks:expecting-element (source "start")
424 (let ((combine (ntc "combine" source))
425 (child
426 (progn
427 (consume-and-skip-to-native source)
428 (p/pattern source))))
429 (skip-foreign* source)
430 (make-start :combine (find-symbol (string-upcase combine) :keyword)
431 :child child))))
433 (defun p/define (source)
434 (klacks:expecting-element (source "define")
435 (let ((name (ntc "name" source))
436 (combine (ntc "combine" source))
437 (children (progn
438 (consume-and-skip-to-native source)
439 (p/pattern+ source))))
440 (make-define :name name
441 :combine (find-symbol (string-upcase combine) :keyword)
442 :children children))))
444 (defun p/div (source)
445 (klacks:expecting-element (source "div")
446 (consume-and-skip-to-native source)
447 (make-div :content (p/grammar-content* source))))
449 (defun p/include (source)
450 (klacks:expecting-element (source "include")
451 (let* ((href
452 (escape-uri (attribute "href" (klacks:list-attributes source))))
453 (base (klacks:current-xml-base source))
454 (uri (safe-parse-uri source href base))
455 (include-content
456 (progn
457 (consume-and-skip-to-native source)
458 (p/grammar-content* source :disallow-include t))))
459 (when (find uri *include-uri-stack* :test #'puri:uri=)
460 (rng-error source "looping include"))
461 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
462 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
463 (grammar
464 (klacks:with-open-source (source (cxml:make-source xstream))
465 (invoke-with-klacks-handler
466 (lambda ()
467 (klacks:find-event source :start-element)
468 (let ((*datatype-library* ""))
469 (p/grammar source)))
470 source)))
471 (grammar-content (pattern-content grammar)))
472 (make-div :content
473 (cons (make-div :content
474 (simplify-include source
475 grammar-content
476 include-content))
477 include-content))))))
479 (defun simplify-include/map (fn l)
480 (loop
481 for x in l
482 for value = (let ((result (funcall fn x)))
483 (when (typep x 'div)
484 (loop
485 for x in (div-content x)
486 for value = (funcall fn x)
487 when value
488 collect value into content
489 finally
490 (setf (div-content x) content)))
491 result)
492 when value
493 collect value))
495 (defun simplify-include/start (source grammar-content include-content)
496 (let ((startp
497 (block nil
498 (simplify-include/map (lambda (x)
499 (when (typep x 'start)
500 (return t))
502 include-content)
503 nil)))
504 (if startp
505 (let ((ok nil))
506 (prog1
507 (simplify-include/map (lambda (x)
508 (cond
509 ((typep x 'start) (setf ok t) nil)
510 (t x)))
511 grammar-content))
512 (unless ok
513 (rng-error source "expected start in grammar")))
514 grammar-content)))
516 (defun simplify-include/define (source grammar-content include-content)
517 (let ((defines '()))
518 (simplify-include/map (lambda (x)
519 (when (typep x 'define)
520 (push (cons x nil) defines))
522 include-content)
523 (prog1
524 (simplify-include/map
525 (lambda (x)
526 (if (typep x 'define)
527 (let ((cons (find (define-name x)
528 defines
529 :key (lambda (y) (define-name (car y)))
530 :test #'equal)))
531 (cond
532 (cons
533 (setf (cdr cons) t)
534 nil)
536 x)))
538 grammar-content)
539 (loop for (define . okp) in defines do
540 (unless okp
541 (rng-error source "expected matching ~A in grammar" define))))))
543 (defun simplify-include (source grammar-content include-content)
544 (simplify-include/define
545 source
546 (simplify-include/start source grammar-content include-content)
547 include-content))
549 (defun p/name-class (source)
550 (klacks:expecting-element (source)
551 (with-library-and-ns (klacks:list-attributes source)
552 (case (find-symbol (klacks:current-lname source) :keyword)
553 (:|name|
554 (let ((qname (string-trim *whitespace*
555 (consume-and-parse-characters source))))
556 (multiple-value-bind (uri lname)
557 (klacks:decode-qname qname source)
558 (list :name lname :uri (or uri *namespace-uri*)))))
559 (:|anyName|
560 (klacks:consume source)
561 (prog1
562 (cons :any (p/except-name-class? source))
563 (skip-to-native source)))
564 (:|nsName|
565 (let ((uri *namespace-uri*))
566 (klacks:consume source)
567 (prog1
568 (list :nsname (p/except-name-class? source) :uri uri)
569 (skip-to-native source))))
570 (:|choice|
571 (klacks:consume source)
572 (cons :choice (p/name-class* source)))
574 (rng-error source "invalid child in except"))))))
576 (defun p/name-class* (source)
577 (let ((results nil))
578 (loop
579 (skip-to-native source)
580 (case (klacks:peek source)
581 (:start-element (push (p/name-class source) results))
582 (:end-element (return)))
583 (klacks:consume source))
584 (nreverse results)))
586 (defun p/except-name-class? (source)
587 (skip-to-native source)
588 (multiple-value-bind (key uri lname)
589 (klacks:peek source)
591 (if (and (eq key :start-element)
592 (string= (find-symbol lname :keyword) "except"))
593 (p/except-name-class source)
594 nil)))
596 (defun p/except-name-class (source)
597 (klacks:expecting-element (source "except")
598 (with-library-and-ns (klacks:list-attributes source)
599 (klacks:consume source)
600 (cons :except (p/name-class* source)))))
602 (defun escape-uri (string)
603 (with-output-to-string (out)
604 (loop for c across (cxml::rod-to-utf8-string string) do
605 (let ((code (char-code c)))
606 ;; http://www.w3.org/TR/xlink/#link-locators
607 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
608 (format out "%~2,'0X" code)
609 (write-char c out))))))
612 ;;;; simplification
614 ;;; 4.1 Annotations
615 ;;; Foreign attributes and elements are removed implicitly while parsing.
617 ;;; 4.2 Whitespace
618 ;;; All character data is discarded while parsing (which can only be
619 ;;; whitespace after validation).
621 ;;; Whitespace in name, type, and combine attributes is stripped while
622 ;;; parsing. Ditto for <name/>.
624 ;;; 4.3. datatypeLibrary attribute
625 ;;; Escaping is done by p/pattern.
626 ;;; Attribute value defaulting is done using *datatype-library*; only
627 ;;; p/data and p/value record the computed value.
629 ;;; 4.4. type attribute of value element
630 ;;; Done by p/value.
632 ;;; 4.5. href attribute
633 ;;; Escaping is done by p/include and p/external-ref.
635 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
636 ;;; but that requires xstream hacking.
638 ;;; 4.6. externalRef element
639 ;;; Done by p/external-ref.
641 ;;; 4.7. include element
642 ;;; Done by p/include.
644 ;;; 4.8. name attribute of element and attribute elements
645 ;;; `name' is stored as a slot, not a child. Done by p/element and
646 ;;; p/attribute.
648 ;;; 4.9. ns attribute
649 ;;; done by p/name-class, p/value, p/element, p/attribute
651 ;;; 4.10. QNames
652 ;;; done by p/name-class
654 ;;;; tests
656 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
657 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
658 (let ((pass 0)
659 (total 0))
660 (dolist (d (directory p))
661 (let ((name (car (last (pathname-directory d)))))
662 (when (parse-integer name :junk-allowed t)
663 (incf total)
664 (when (test1 d)
665 (incf pass)))))
666 (format t "Passed ~D/~D tests.~%" pass total))
667 (dribble))
669 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
670 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
672 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
673 (let* ((*debug* t)
674 (d (merge-pathnames (format nil "~3,'0D/" n) p))
675 (i (merge-pathnames "i.rng" d))
676 (c (merge-pathnames "c.rng" d))
677 (rng (if (probe-file c) c i)))
678 (format t "~A: " (car (last (pathname-directory d))))
679 (print rng)
680 (parse-relax-ng rng)))
682 (defun test1 (d)
683 (let* ((i (merge-pathnames "i.rng" d))
684 (c (merge-pathnames "c.rng" d)))
685 (format t "~A: " (car (last (pathname-directory d))))
686 (if (probe-file c)
687 (handler-case
688 (progn
689 (parse-relax-ng c)
690 (format t " PASS~%")
692 (error (c)
693 (format t " FAIL: ~A~%" c)
694 nil))
695 (handler-case
696 (progn
697 (parse-relax-ng i)
698 (format t " FAIL: didn't detect invalid schema~%")
699 nil)
700 (rng-error (c)
701 (format t " PASS: ~S~%" (type-of c))
703 (error (c)
704 (format t " FAIL: incorrect condition type: ~A~%" c)
705 nil)))))