div raus
[cxml-rng.git] / parse.lisp
blob8b604b927ee33533ecbb3181a9bcdc0c9e10c387
1 (in-package :cxml-rng)
3 #+sbcl
4 (declaim (optimize (debug 2)))
7 ;;;; Errors
9 (define-condition rng-error (simple-error) ())
11 (defun rng-error (source fmt &rest args)
12 (let ((s (make-string-output-stream)))
13 (apply #'format s fmt args)
14 (when source
15 (format s "~& [ Error at line ~D, column ~D in ~S ]"
16 (klacks:current-line-number source)
17 (klacks:current-column-number source)
18 (klacks:current-system-id source)))
19 (error 'rng-error
20 :format-control "~A"
21 :format-arguments (list (get-output-stream-string s)))))
24 ;;;; Parser
26 (defvar *datatype-library*)
27 (defvar *namespace-uri*)
28 (defvar *entity-resolver*)
29 (defvar *external-href-stack*)
30 (defvar *include-uri-stack*)
32 (defvar *debug* nil)
34 (defun invoke-with-klacks-handler (fn source)
35 (if *debug*
36 (funcall fn)
37 (handler-case
38 (funcall fn)
39 (cxml:xml-parse-error (c)
40 (rng-error source "Cannot parse schema: ~A" c)))))
42 (defun parse-relax-ng (input &key entity-resolver)
43 (klacks:with-open-source (source (cxml:make-source input))
44 (invoke-with-klacks-handler
45 (lambda ()
46 (klacks:find-event source :start-element)
47 (let ((*datatype-library* "")
48 (*namespace-uri* "")
49 (*entity-resolver* entity-resolver)
50 (*external-href-stack* '())
51 (*include-uri-stack* '()))
52 (p/pattern source)))
53 source)))
56 ;;;; pattern structures
58 (defstruct pattern)
60 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
61 possibilities)
63 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
64 name)
66 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
67 children)
69 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
70 child)
72 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
73 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
74 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
75 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
76 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
77 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
78 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
79 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
81 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
83 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
85 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
86 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
88 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
89 datatype-library
90 type)
92 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
94 string)
96 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
97 params
98 except)
100 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
102 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
103 content)
106 ;;;; non-pattern
108 (defstruct param
109 name
110 string)
112 (defstruct start
113 combine
114 child)
116 (defstruct define
117 name
118 combine
119 children)
122 ;;;; parser
124 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
126 (defun skip-foreign* (source)
127 (loop
128 (case (klacks:peek-next source)
129 (:start-element (skip-foreign source))
130 (:end-element (return)))))
132 (defun skip-to-native (source)
133 (loop
134 (case (klacks:peek source)
135 (:start-element
136 (when (equal (klacks:current-uri source) *rng-namespace*)
137 (return))
138 (klacks:serialize-element source nil))
139 (:end-element (return)))
140 (klacks:consume source)))
142 (defun consume-and-skip-to-native (source)
143 (klacks:consume source)
144 (skip-to-native source))
146 (defun skip-foreign (source)
147 (when (equal (klacks:current-uri source) *rng-namespace*)
148 (rng-error source
149 "invalid schema: ~A not allowed here"
150 (klacks:current-lname source)))
151 (klacks:serialize-element source nil))
153 (defun attribute (lname attrs)
154 (let ((a (sax:find-attribute-ns "" lname attrs)))
155 (if a
156 (sax:attribute-value a)
157 nil)))
159 (defvar *whitespace*
160 (format nil "~C~C~C"
161 (code-char 9)
162 (code-char 32)
163 (code-char 13)
164 (code-char 10)))
166 (defun ntc (lname source-or-attrs)
167 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
168 (let* ((attrs
169 (if (listp source-or-attrs)
170 source-or-attrs
171 (klacks:list-attributes source-or-attrs)))
172 (a (sax:find-attribute-ns "" lname attrs)))
173 (if a
174 (string-trim *whitespace* (sax:attribute-value a))
175 nil)))
177 (defmacro with-library-and-ns (attrs &body body)
178 `(invoke-with-library-and-ns (lambda () ,@body) ,attrs))
180 (defun invoke-with-library-and-ns (fn attrs)
181 (let* ((dl (attribute "datatypeLibrary" attrs))
182 (ns (attribute "ns" attrs))
183 (*datatype-library* (if dl (escape-uri dl) *datatype-library*))
184 (*namespace-uri* (or ns *namespace-uri*)))
185 (funcall fn)))
187 (defun p/pattern (source)
188 (let* ((lname (klacks:current-lname source))
189 (attrs (klacks:list-attributes source)))
190 (with-library-and-ns attrs
191 (case (find-symbol lname :keyword)
192 (:|element| (p/element source (ntc "name" attrs)))
193 (:|attribute| (p/attribute source (ntc "name" attrs)))
194 (:|group| (p/combination #'make-group source))
195 (:|interleave| (p/combination #'make-interleave source))
196 (:|choice| (p/combination #'make-choice source))
197 (:|optional| (p/combination #'make-optional source))
198 (:|zeroOrMore| (p/combination #'make-zero-or-more source))
199 (:|oneOrMore| (p/combination #'make-one-or-more source))
200 (:|list| (p/combination #'make-list-pattern source))
201 (:|mixed| (p/combination #'make-mixed source))
202 (:|ref| (p/ref source))
203 (:|parentRef| (p/parent-ref source))
204 (:|empty| (p/empty source))
205 (:|text| (p/text source))
206 (:|value| (p/value source))
207 (:|data| (p/data source))
208 (:|notAllowed| (p/not-allowed source))
209 (:|externalRef| (p/external-ref source))
210 (:|grammar| (p/grammar source))
211 (t (skip-foreign source))))))
213 (defun p/pattern+ (source)
214 (let ((children nil))
215 (loop
216 (case (klacks:peek source)
217 (:start-element
218 (let ((p (p/pattern source))) (when p (push p children))))
219 (:end-element
220 (return))
222 (klacks:consume source))))
223 (unless children
224 (rng-error source "empty element"))
225 (nreverse children)))
227 (defun p/pattern? (source)
228 (let ((result nil))
229 (loop
230 (skip-to-native source)
231 (case (klacks:peek source)
232 (:start-element
233 (when result
234 (rng-error source "at most one pattern expected here"))
235 (setf result (p/pattern source)))
236 (:end-element
237 (return))
239 (klacks:consume source))))
240 result))
242 (defun p/element (source name)
243 (klacks:expecting-element (source "element")
244 (let ((result (make-element)))
245 (consume-and-skip-to-native source)
246 (if name
247 (setf (pattern-name result)
248 (list :name name :uri *namespace-uri*))
249 (setf (pattern-name result) (p/name-class source)))
250 (skip-to-native source)
251 (setf (pattern-children result) (p/pattern+ source))
252 result)))
254 (defun p/attribute (source name)
255 (klacks:expecting-element (source "attribute")
256 (let ((result (make-attribute)))
257 (consume-and-skip-to-native source)
258 (if name
259 (setf (pattern-name result)
260 (list :name name :uri ""))
261 (setf (pattern-name result) (p/name-class source)))
262 (skip-to-native source)
263 (setf (pattern-child result) (p/pattern? source))
264 result)))
266 (defun p/combination (constructor source)
267 (klacks:expecting-element (source)
268 (consume-and-skip-to-native source)
269 (let ((possibilities (p/pattern+ source)))
270 (funcall constructor :possibilities possibilities))))
272 (defun p/ref (source)
273 (klacks:expecting-element (source "ref")
274 (prog1
275 (make-ref :name (ntc "name" source))
276 (skip-foreign* source))))
278 (defun p/parent-ref (source)
279 (klacks:expecting-element (source "parentRef")
280 (prog1
281 (make-parent-ref :name (ntc "name" source))
282 (skip-foreign* source))))
284 (defun p/empty (source)
285 (klacks:expecting-element (source "empty")
286 (skip-foreign* source)
287 (make-empty)))
289 (defun p/text (source)
290 (klacks:expecting-element (source "text")
291 (skip-foreign* source)
292 (make-text)))
294 (defun consume-and-parse-characters (source)
295 ;; fixme
296 (let ((tmp ""))
297 (loop
298 (multiple-value-bind (key data) (klacks:peek-next source)
299 (case key
300 (:characters
301 (setf tmp (concatenate 'string tmp data)))
302 (:end-element (return)))))
303 tmp))
305 (defun p/value (source)
306 (klacks:expecting-element (source "value")
307 (let* ((type (ntc "type" source))
308 (string (consume-and-parse-characters source))
309 (ns *namespace-uri*)
310 (dl *datatype-library*))
311 (unless type
312 (setf type "token")
313 (setf dl ""))
314 (make-value :string string :type type :ns ns :datatype-library dl))))
316 (defun p/data (source)
317 (klacks:expecting-element (source "data")
318 (let* ((type (ntc "type" source))
319 (result (make-data :type type
320 :datatype-library *datatype-library*
322 (params '()))
323 (loop
324 (multiple-value-bind (key uri lname)
325 (klacks:peek-next source)
327 (case key
328 (:start-element
329 (case (find-symbol lname :keyword)
330 (:|param| (push (p/param source) params))
331 (:|except|
332 (setf (pattern-except result) (p/except-pattern source))
333 (skip-to-native source)
334 (return))
335 (t (skip-foreign source))))
336 (:end-element
337 (return)))))
338 (setf (pattern-params result) (nreverse params))
339 result)))
341 (defun p/param (source)
342 (klacks:expecting-element (source "param")
343 (let ((name (ntc "name" source))
344 (string (consume-and-parse-characters source)))
345 (make-param :name name :string string))))
347 (defun p/except-pattern (source)
348 (klacks:expecting-element (source "except")
349 (with-library-and-ns (klacks:list-attributes source)
350 (klacks:consume source)
351 (p/pattern+ source))))
353 (defun p/not-allowed (source)
354 (klacks:expecting-element (source "notAllowed")
355 (consume-and-skip-to-native source)
356 (make-not-allowed)))
358 (defun safe-parse-uri (source str &optional base)
359 (when (zerop (length str))
360 (rng-error source "missing URI"))
361 (handler-case
362 (if base
363 (puri:merge-uris str base)
364 (puri:parse-uri str))
365 (puri:uri-parse-error ()
366 (rng-error source "invalid URI: ~A" str))))
368 (defun p/external-ref (source)
369 (klacks:expecting-element (source "externalRef")
370 (let* ((href
371 (escape-uri (attribute "href" (klacks:list-attributes source))))
372 (base (klacks:current-xml-base source))
373 (uri (safe-parse-uri source href base)))
374 (when (find uri *include-uri-stack* :test #'puri:uri=)
375 (rng-error source "looping include"))
376 (prog1
377 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
378 (xstream
379 (cxml::xstream-open-extid* *entity-resolver* nil uri)))
380 (klacks:with-open-source (source (cxml:make-source xstream))
381 (invoke-with-klacks-handler
382 (lambda ()
383 (klacks:find-event source :start-element)
384 (let ((*datatype-library* ""))
385 (p/pattern source)))
386 source)))
387 (skip-foreign* source)))))
389 (defun p/grammar (source)
390 (klacks:expecting-element (source "grammar")
391 (consume-and-skip-to-native source)
392 (make-grammar :content (p/grammar-content* source))))
394 (defun p/grammar-content* (source &key disallow-include)
395 (loop
396 append
397 (prog1
398 (multiple-value-bind (key uri lname) (klacks:peek source)
400 (case key
401 (:start-element
402 (with-library-and-ns (klacks:list-attributes source)
403 (case (find-symbol lname :keyword)
404 (:|start| (list (p/start source)))
405 (:|define| (list (p/define source)))
406 (:|div| (p/div source))
407 (:|include|
408 (when disallow-include
409 (rng-error source "nested include not permitted"))
410 (p/include source))
412 (skip-foreign source)
413 nil))))
414 (:end-element
415 (loop-finish))))
416 (klacks:consume source))))
418 (defun p/start (source)
419 (klacks:expecting-element (source "start")
420 (let ((combine (ntc "combine" source))
421 (child
422 (progn
423 (consume-and-skip-to-native source)
424 (p/pattern source))))
425 (skip-foreign* source)
426 (make-start :combine (find-symbol (string-upcase combine) :keyword)
427 :child child))))
429 (defun p/define (source)
430 (klacks:expecting-element (source "define")
431 (let ((name (ntc "name" source))
432 (combine (ntc "combine" source))
433 (children (progn
434 (consume-and-skip-to-native source)
435 (p/pattern+ source))))
436 (make-define :name name
437 :combine (find-symbol (string-upcase combine) :keyword)
438 :children children))))
440 (defun p/div (source)
441 (klacks:expecting-element (source "div")
442 (consume-and-skip-to-native source)
443 (p/grammar-content* source)))
445 (defun p/include (source)
446 (klacks:expecting-element (source "include")
447 (let* ((href
448 (escape-uri (attribute "href" (klacks:list-attributes source))))
449 (base (klacks:current-xml-base source))
450 (uri (safe-parse-uri source href base))
451 (include-content
452 (progn
453 (consume-and-skip-to-native source)
454 (p/grammar-content* source :disallow-include t))))
455 (when (find uri *include-uri-stack* :test #'puri:uri=)
456 (rng-error source "looping include"))
457 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
458 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
459 (grammar
460 (klacks:with-open-source (source (cxml:make-source xstream))
461 (invoke-with-klacks-handler
462 (lambda ()
463 (klacks:find-event source :start-element)
464 (let ((*datatype-library* ""))
465 (p/grammar source)))
466 source)))
467 (grammar-content (pattern-content grammar)))
468 (append
469 (simplify-include source grammar-content include-content)
470 include-content)))))
472 (defun simplify-include/map (fn l)
473 (remove nil (mapcar fn l)))
475 (defun simplify-include/start (source grammar-content include-content)
476 (let ((startp (some (lambda (x) (typep x 'start)) include-content)))
477 (if startp
478 (let ((ok nil))
479 (prog1
480 (remove-if (lambda (x)
481 (when (typep x 'start)
482 (setf ok t)
484 grammar-content)
485 (unless ok
486 (rng-error source "expected start in grammar"))))
487 grammar-content)))
489 (defun simplify-include/define (source grammar-content include-content)
490 (let ((defines '()))
491 (dolist (x include-content)
492 (when (typep x 'define)
493 (push (cons x nil) defines)))
494 (prog1
495 (remove-if (lambda (x)
496 (when (typep x 'define)
497 (let ((cons (find (define-name x)
498 defines
499 :key (lambda (y)
500 (define-name (car y)))
501 :test #'equal)))
502 (when cons
503 (setf (cdr cons) t)
504 t))))
505 grammar-content)
506 (loop for (define . okp) in defines do
507 (unless okp
508 (rng-error source "expected matching ~A in grammar" define))))))
510 (defun simplify-include (source grammar-content include-content)
511 (simplify-include/define
512 source
513 (simplify-include/start source grammar-content include-content)
514 include-content))
516 (defun p/name-class (source)
517 (klacks:expecting-element (source)
518 (with-library-and-ns (klacks:list-attributes source)
519 (case (find-symbol (klacks:current-lname source) :keyword)
520 (:|name|
521 (let ((qname (string-trim *whitespace*
522 (consume-and-parse-characters source))))
523 (multiple-value-bind (uri lname)
524 (klacks:decode-qname qname source)
525 (list :name lname :uri (or uri *namespace-uri*)))))
526 (:|anyName|
527 (klacks:consume source)
528 (prog1
529 (cons :any (p/except-name-class? source))
530 (skip-to-native source)))
531 (:|nsName|
532 (let ((uri *namespace-uri*))
533 (klacks:consume source)
534 (prog1
535 (list :nsname (p/except-name-class? source) :uri uri)
536 (skip-to-native source))))
537 (:|choice|
538 (klacks:consume source)
539 (cons :choice (p/name-class* source)))
541 (rng-error source "invalid child in except"))))))
543 (defun p/name-class* (source)
544 (let ((results nil))
545 (loop
546 (skip-to-native source)
547 (case (klacks:peek source)
548 (:start-element (push (p/name-class source) results))
549 (:end-element (return)))
550 (klacks:consume source))
551 (nreverse results)))
553 (defun p/except-name-class? (source)
554 (skip-to-native source)
555 (multiple-value-bind (key uri lname)
556 (klacks:peek source)
558 (if (and (eq key :start-element)
559 (string= (find-symbol lname :keyword) "except"))
560 (p/except-name-class source)
561 nil)))
563 (defun p/except-name-class (source)
564 (klacks:expecting-element (source "except")
565 (with-library-and-ns (klacks:list-attributes source)
566 (klacks:consume source)
567 (cons :except (p/name-class* source)))))
569 (defun escape-uri (string)
570 (with-output-to-string (out)
571 (loop for c across (cxml::rod-to-utf8-string string) do
572 (let ((code (char-code c)))
573 ;; http://www.w3.org/TR/xlink/#link-locators
574 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
575 (format out "%~2,'0X" code)
576 (write-char c out))))))
579 ;;;; simplification
581 ;;; 4.1 Annotations
582 ;;; Foreign attributes and elements are removed implicitly while parsing.
584 ;;; 4.2 Whitespace
585 ;;; All character data is discarded while parsing (which can only be
586 ;;; whitespace after validation).
588 ;;; Whitespace in name, type, and combine attributes is stripped while
589 ;;; parsing. Ditto for <name/>.
591 ;;; 4.3. datatypeLibrary attribute
592 ;;; Escaping is done by p/pattern.
593 ;;; Attribute value defaulting is done using *datatype-library*; only
594 ;;; p/data and p/value record the computed value.
596 ;;; 4.4. type attribute of value element
597 ;;; Done by p/value.
599 ;;; 4.5. href attribute
600 ;;; Escaping is done by p/include and p/external-ref.
602 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
603 ;;; but that requires xstream hacking.
605 ;;; 4.6. externalRef element
606 ;;; Done by p/external-ref.
608 ;;; 4.7. include element
609 ;;; Done by p/include.
611 ;;; 4.8. name attribute of element and attribute elements
612 ;;; `name' is stored as a slot, not a child. Done by p/element and
613 ;;; p/attribute.
615 ;;; 4.9. ns attribute
616 ;;; done by p/name-class, p/value, p/element, p/attribute
618 ;;; 4.10. QNames
619 ;;; done by p/name-class
621 ;;; 4.11. div element
622 ;;; Legen wir gar nicht erst an.
624 ;;;; tests
626 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
627 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
628 (let ((pass 0)
629 (total 0))
630 (dolist (d (directory p))
631 (let ((name (car (last (pathname-directory d)))))
632 (when (parse-integer name :junk-allowed t)
633 (incf total)
634 (when (test1 d)
635 (incf pass)))))
636 (format t "Passed ~D/~D tests.~%" pass total))
637 (dribble))
639 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
640 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
642 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
643 (let* ((*debug* t)
644 (d (merge-pathnames (format nil "~3,'0D/" n) p))
645 (i (merge-pathnames "i.rng" d))
646 (c (merge-pathnames "c.rng" d))
647 (rng (if (probe-file c) c i)))
648 (format t "~A: " (car (last (pathname-directory d))))
649 (print rng)
650 (parse-relax-ng rng)))
652 (defun test1 (d)
653 (let* ((i (merge-pathnames "i.rng" d))
654 (c (merge-pathnames "c.rng" d)))
655 (format t "~A: " (car (last (pathname-directory d))))
656 (if (probe-file c)
657 (handler-case
658 (progn
659 (parse-relax-ng c)
660 (format t " PASS~%")
662 (error (c)
663 (format t " FAIL: ~A~%" c)
664 nil))
665 (handler-case
666 (progn
667 (parse-relax-ng i)
668 (format t " FAIL: didn't detect invalid schema~%")
669 nil)
670 (rng-error (c)
671 (format t " PASS: ~S~%" (type-of c))
673 (error (c)
674 (format t " FAIL: incorrect condition type: ~A~%" c)
675 nil)))))