alle c-tests bestanden
[cxml-rng.git] / parse.lisp
blob7285c477595ca9be2c2ec82af7884d82be0c533e
1 (in-package :cxml-rng)
3 #+sbcl
4 (declaim (optimize (debug 2)))
7 ;;;; Errors
9 (define-condition rng-error (simple-error) ())
11 (defun rng-error (source fmt &rest args)
12 (let ((s (make-string-output-stream)))
13 (apply #'format s fmt args)
14 (when source
15 (format s "~& [ Error at line ~D, column ~D in ~S ]"
16 (klacks:current-line-number source)
17 (klacks:current-column-number source)
18 (klacks:current-system-id source)))
19 (error 'rng-error
20 :format-control "~A"
21 :format-arguments (list (get-output-stream-string s)))))
24 ;;;; Parser
26 (defvar *datatype-library*)
27 (defvar *entity-resolver*)
28 (defvar *external-href-stack*)
29 (defvar *include-uri-stack*)
31 (defvar *debug* nil)
33 (defun invoke-with-klacks-handler (fn source)
34 (if *debug*
35 (funcall fn)
36 (handler-case
37 (funcall fn)
38 (cxml:xml-parse-error (c)
39 (rng-error source "Cannot parse schema: ~A" c)))))
41 (defun parse-relax-ng (input &key entity-resolver)
42 (klacks:with-open-source (source (cxml:make-source input))
43 (invoke-with-klacks-handler
44 (lambda ()
45 (klacks:find-event source :start-element)
46 (let ((*datatype-library* "")
47 (*entity-resolver* entity-resolver)
48 (*external-href-stack* '())
49 (*include-uri-stack* '()))
50 (p/pattern source)))
51 source)))
54 ;;;; pattern structures
56 (defstruct pattern
57 ns)
59 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
60 possibilities)
62 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
63 name)
65 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
66 children)
68 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
69 child)
71 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
72 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
73 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
74 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
75 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
76 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
77 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
78 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
80 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
82 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
84 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
85 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
87 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
88 datatype-library
89 type)
91 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
92 string)
94 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
95 params
96 except)
98 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
100 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
101 content)
104 ;;;; non-pattern
106 (defstruct param
107 name
108 string)
110 (defstruct start
111 combine
112 child)
114 (defstruct define
115 name
116 combine
117 children)
119 (defstruct div
120 content)
122 (defstruct include
123 href
124 content)
127 ;;;; parser
129 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
131 (defun skip-foreign* (source)
132 (loop
133 (case (klacks:peek-next source)
134 (:start-element (skip-foreign source))
135 (:end-element (return)))))
137 (defun skip-to-native (source)
138 (loop
139 (case (klacks:peek source)
140 (:start-element
141 (when (equal (klacks:current-uri source) *rng-namespace*)
142 (return))
143 (klacks:serialize-element source nil))
144 (:end-element (return)))
145 (klacks:consume source)))
147 (defun consume-and-skip-to-native (source)
148 (klacks:consume source)
149 (skip-to-native source))
151 (defun skip-foreign (source)
152 (when (equal (klacks:current-uri source) *rng-namespace*)
153 (rng-error source
154 "invalid schema: ~A not allowed here"
155 (klacks:current-lname source)))
156 (klacks:serialize-element source nil))
158 (defun attribute (lname attrs)
159 (let ((a (sax:find-attribute-ns "" lname attrs)))
160 (if a
161 (sax:attribute-value a)
162 nil)))
164 (defvar *whitespace*
165 (format nil "~C~C~C"
166 (code-char 9)
167 (code-char 32)
168 (code-char 13)
169 (code-char 10)))
171 (defun ntc (lname source-or-attrs)
172 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
173 (let* ((attrs
174 (if (listp source-or-attrs)
175 source-or-attrs
176 (klacks:list-attributes source-or-attrs)))
177 (a (sax:find-attribute-ns "" lname attrs)))
178 (if a
179 (string-trim *whitespace* (sax:attribute-value a))
180 nil)))
182 (defmacro with-datatype-library (attrs &body body)
183 `(invoke-with-datatype-library (lambda () ,@body) ,attrs))
185 (defun invoke-with-datatype-library (fn attrs)
186 (let* ((dl (attribute "datatypeLibrary" attrs))
187 (*datatype-library* (if dl (escape-uri dl) *datatype-library*)))
188 (funcall fn)))
190 (defun p/pattern (source)
191 (let* ((lname (klacks:current-lname source))
192 (attrs (klacks:list-attributes source))
193 (ns (attribute "ns" attrs)))
194 (with-datatype-library attrs
195 (case (find-symbol lname :keyword)
196 (:|element| (p/element source (ntc "name" attrs) ns))
197 (:|attribute| (p/attribute source (ntc "name" attrs) ns))
198 (:|group| (p/combination #'make-group source ns))
199 (:|interleave| (p/combination #'make-interleave source ns))
200 (:|choice| (p/combination #'make-choice source ns))
201 (:|optional| (p/combination #'make-optional source ns))
202 (:|zeroOrMore| (p/combination #'make-zero-or-more source ns))
203 (:|oneOrMore| (p/combination #'make-one-or-more source ns))
204 (:|list| (p/combination #'make-list-pattern source ns))
205 (:|mixed| (p/combination #'make-mixed source ns))
206 (:|ref| (p/ref source ns))
207 (:|parentRef| (p/parent-ref source ns))
208 (:|empty| (p/empty source ns))
209 (:|text| (p/text source ns))
210 (:|value| (p/value source ns))
211 (:|data| (p/data source ns))
212 (:|notAllowed| (p/not-allowed source ns))
213 (:|externalRef| (p/external-ref source ns))
214 (:|grammar| (p/grammar source ns))
215 (t (skip-foreign source))))))
217 (defun p/pattern+ (source)
218 (let ((children nil))
219 (loop
220 (case (klacks:peek source)
221 (:start-element
222 (let ((p (p/pattern source))) (when p (push p children))))
223 (:end-element
224 (return))
226 (klacks:consume source))))
227 (unless children
228 (rng-error source "empty element"))
229 (nreverse children)))
231 (defun p/pattern? (source)
232 (let ((result nil))
233 (loop
234 (skip-to-native source)
235 (case (klacks:peek source)
236 (:start-element
237 (when result
238 (rng-error source "at most one pattern expected here"))
239 (setf result (p/pattern source)))
240 (:end-element
241 (return))
243 (klacks:consume source))))
244 result))
246 (defun p/element (source name ns)
247 (klacks:expecting-element (source "element")
248 (let ((result (make-element :ns ns)))
249 (consume-and-skip-to-native source)
250 (if name
251 (setf (pattern-name result) (list :name name))
252 (setf (pattern-name result) (p/name-class source)))
253 (skip-to-native source)
254 (setf (pattern-children result) (p/pattern+ source))
255 result)))
257 (defun p/attribute (source name ns)
258 (klacks:expecting-element (source "attribute")
259 (let ((result (make-attribute :ns ns)))
260 (consume-and-skip-to-native source)
261 (if name
262 (setf (pattern-name result) (list :name name))
263 (setf (pattern-name result) (p/name-class source)))
264 (skip-to-native source)
265 (setf (pattern-child result) (p/pattern? source))
266 result)))
268 (defun p/combination (constructor source ns)
269 (klacks:expecting-element (source)
270 (consume-and-skip-to-native source)
271 (let ((possibilities (p/pattern+ source)))
272 (funcall constructor :possibilities possibilities :ns ns))))
274 (defun p/ref (source ns)
275 (klacks:expecting-element (source "ref")
276 (prog1
277 (make-ref :name (ntc "name" source) :ns ns)
278 (skip-foreign* source))))
280 (defun p/parent-ref (source ns)
281 (klacks:expecting-element (source "parentRef")
282 (prog1
283 (make-parent-ref :name (ntc "name" source) :ns ns)
284 (skip-foreign* source))))
286 (defun p/empty (source ns)
287 (klacks:expecting-element (source "empty")
288 (skip-foreign* source)
289 (make-empty :ns ns)))
291 (defun p/text (source ns)
292 (klacks:expecting-element (source "text")
293 (skip-foreign* source)
294 (make-text :ns ns)))
296 (defun consume-and-parse-characters (source)
297 ;; fixme
298 (let ((tmp ""))
299 (loop
300 (multiple-value-bind (key data) (klacks:peek-next source)
301 (case key
302 (:characters
303 (setf tmp (concatenate 'string tmp data)))
304 (:end-element (return)))))
305 tmp))
307 (defun p/value (source ns)
308 (klacks:expecting-element (source "value")
309 (let* ((type (ntc "type" source))
310 (string (consume-and-parse-characters source))
311 (dl *datatype-library*))
312 (unless type
313 (setf type "token")
314 (setf dl ""))
315 (make-value :string string :type type :datatype-library dl :ns ns))))
317 (defun p/data (source ns)
318 (klacks:expecting-element (source "data")
319 (let* ((type (ntc "type" source))
320 (result (make-data :type type
321 :datatype-library *datatype-library*
322 :ns ns))
323 (params '()))
324 (loop
325 (multiple-value-bind (key uri lname)
326 (klacks:peek-next source)
328 (case key
329 (:start-element
330 (case (find-symbol lname :keyword)
331 (:|param| (push (p/param source) params))
332 (:|except|
333 (setf (pattern-except result) (p/except-pattern source))
334 (skip-to-native source)
335 (return))
336 (t (skip-foreign source))))
337 (:end-element
338 (return)))))
339 (setf (pattern-params result) (nreverse params))
340 result)))
342 (defun p/param (source)
343 (klacks:expecting-element (source "param")
344 (let ((name (ntc "name" source))
345 (string (consume-and-parse-characters source)))
346 (make-param :name name :string string))))
348 (defun p/except-pattern (source)
349 (klacks:expecting-element (source "except")
350 (with-datatype-library (klacks:list-attributes source)
351 (klacks:consume source)
352 (p/pattern+ source))))
354 (defun p/not-allowed (source ns)
355 (klacks:expecting-element (source "notAllowed")
356 (consume-and-skip-to-native source)
357 (make-not-allowed :ns ns)))
359 (defun safe-parse-uri (source str &optional base)
360 (when (zerop (length str))
361 (rng-error source "missing URI"))
362 (handler-case
363 (if base
364 (puri:merge-uris str base)
365 (puri:parse-uri str))
366 (puri:uri-parse-error ()
367 (rng-error source "invalid URI: ~A" str))))
369 (defun p/external-ref (source ns)
370 (klacks:expecting-element (source "externalRef")
371 (let* ((href
372 (escape-uri (attribute "href" (klacks:list-attributes source))))
373 (base (klacks:current-xml-base source))
374 (uri (safe-parse-uri source href base)))
375 (when (find uri *include-uri-stack* :test #'puri:uri=)
376 (rng-error source "looping include"))
377 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
378 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
379 (result
380 (klacks:with-open-source (source (cxml:make-source xstream))
381 (invoke-with-klacks-handler
382 (lambda ()
383 (klacks:find-event source :start-element)
384 (let ((*datatype-library* ""))
385 (p/pattern source)))
386 source))))
387 (unless (pattern-ns result)
388 (setf (pattern-ns result) ns))
389 (skip-foreign* source)
390 result))))
392 (defun p/grammar (source ns)
393 (klacks:expecting-element (source "grammar")
394 (consume-and-skip-to-native source)
395 (make-grammar :content (p/grammar-content* source) :ns ns)))
397 (defun p/grammar-content* (source &key disallow-include)
398 (let ((content nil))
399 (loop
400 (multiple-value-bind (key uri lname) (klacks:peek source)
402 (case key
403 (:start-element
404 (with-datatype-library (klacks:list-attributes source)
405 (case (find-symbol lname :keyword)
406 (:|start| (push (p/start source) content))
407 (:|define| (push (p/define source) content))
408 (:|div| (push (p/div source) content))
409 (:|include|
410 (when disallow-include
411 (rng-error source "nested include not permitted"))
412 (push (p/include source) content))
413 (t (skip-foreign source)))))
414 (:end-element (return))))
415 (klacks:consume source))
416 (nreverse content)))
418 (defun p/start (source)
419 (klacks:expecting-element (source "start")
420 (let ((combine (ntc "combine" source))
421 (child
422 (progn
423 (consume-and-skip-to-native source)
424 (p/pattern source))))
425 (skip-foreign* source)
426 (make-start :combine (find-symbol (string-upcase combine) :keyword)
427 :child child))))
429 (defun p/define (source)
430 (klacks:expecting-element (source "define")
431 (let ((name (ntc "name" source))
432 (combine (ntc "combine" source))
433 (children (progn
434 (consume-and-skip-to-native source)
435 (p/pattern+ source))))
436 (make-define :name name
437 :combine (find-symbol (string-upcase combine) :keyword)
438 :children children))))
440 (defun p/div (source)
441 (klacks:expecting-element (source "div")
442 (consume-and-skip-to-native source)
443 (make-div :content (p/grammar-content* source))))
445 (defun p/include (source)
446 (klacks:expecting-element (source "include")
447 (let* ((href
448 (escape-uri (attribute "href" (klacks:list-attributes source))))
449 (base (klacks:current-xml-base source))
450 (uri (safe-parse-uri source href base))
451 (include-content
452 (progn
453 (consume-and-skip-to-native source)
454 (p/grammar-content* source :disallow-include t))))
455 (when (find uri *include-uri-stack* :test #'puri:uri=)
456 (rng-error source "looping include"))
457 (let* ((*include-uri-stack* (cons uri *include-uri-stack*))
458 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
459 (grammar
460 (klacks:with-open-source (source (cxml:make-source xstream))
461 (invoke-with-klacks-handler
462 (lambda ()
463 (klacks:find-event source :start-element)
464 (let ((*datatype-library* ""))
465 (p/grammar source "wrong://")))
466 source)))
467 (grammar-content (pattern-content grammar)))
468 (make-div :content
469 (cons (make-div :content
470 (simplify-include source
471 grammar-content
472 include-content))
473 include-content))))))
475 (defun simplify-include/map (fn l)
476 (loop
477 for x in l
478 for value = (let ((result (funcall fn x)))
479 (when (typep x 'div)
480 (loop
481 for x in (div-content x)
482 for value = (funcall fn x)
483 when value
484 collect value into content
485 finally
486 (setf (div-content x) content)))
487 result)
488 when value
489 collect value))
491 (defun simplify-include/start (source grammar-content include-content)
492 (let ((startp
493 (block nil
494 (simplify-include/map (lambda (x)
495 (when (typep x 'start)
496 (return t))
498 include-content)
499 nil)))
500 (if startp
501 (let ((ok nil))
502 (prog1
503 (simplify-include/map (lambda (x)
504 (cond
505 ((typep x 'start) (setf ok t) nil)
506 (t x)))
507 grammar-content))
508 (unless ok
509 (rng-error source "expected start in grammar")))
510 grammar-content)))
512 (defun simplify-include/define (source grammar-content include-content)
513 (let ((defines '()))
514 (simplify-include/map (lambda (x)
515 (when (typep x 'define)
516 (push (cons x nil) defines))
518 include-content)
519 (prog1
520 (simplify-include/map
521 (lambda (x)
522 (if (typep x 'define)
523 (let ((cons (find (define-name x)
524 defines
525 :key (lambda (y) (define-name (car y)))
526 :test #'equal)))
527 (cond
528 (cons
529 (setf (cdr cons) t)
530 nil)
532 x)))
534 grammar-content)
535 (loop for (define . okp) in defines do
536 (unless okp
537 (rng-error source "expected matching ~A in grammar" define))))))
539 (defun simplify-include (source grammar-content include-content)
540 (simplify-include/define
541 source
542 (simplify-include/start source grammar-content include-content)
543 include-content))
545 (defun p/name-class (source)
546 (klacks:expecting-element (source)
547 (with-datatype-library (klacks:list-attributes source)
548 (case (find-symbol (klacks:current-lname source) :keyword)
549 (:|name|
550 (list :name (string-trim *whitespace*
551 (consume-and-parse-characters source))))
552 (:|anyName|
553 (klacks:consume source)
554 (prog1
555 (cons :any (p/except-name-class? source))
556 (skip-to-native source)))
557 (:|nsName|
558 (klacks:consume source)
559 (prog1
560 (cons :ns (p/except-name-class? source))
561 (skip-to-native source)))
562 (:|choice|
563 (klacks:consume source)
564 (cons :choice (p/name-class* source)))
566 (rng-error source "invalid child in except"))))))
568 (defun p/name-class* (source)
569 (let ((results nil))
570 (loop
571 (skip-to-native source)
572 (case (klacks:peek source)
573 (:start-element (push (p/name-class source) results))
574 (:end-element (return)))
575 (klacks:consume source))
576 (nreverse results)))
578 (defun p/except-name-class? (source)
579 (skip-to-native source)
580 (multiple-value-bind (key uri lname)
581 (klacks:peek source)
583 (if (and (eq key :start-element)
584 (string= (find-symbol lname :keyword) "except"))
585 (p/except-name-class source)
586 nil)))
588 (defun p/except-name-class (source)
589 (klacks:expecting-element (source "except")
590 (with-datatype-library (klacks:list-attributes source)
591 (klacks:consume source)
592 (cons :except (p/name-class* source)))))
594 (defun escape-uri (string)
595 (with-output-to-string (out)
596 (loop for c across (cxml::rod-to-utf8-string string) do
597 (let ((code (char-code c)))
598 ;; http://www.w3.org/TR/xlink/#link-locators
599 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
600 (format out "%~2,'0X" code)
601 (write-char c out))))))
604 ;;;; simplification
606 ;;; 4.1 Annotations
607 ;;; Foreign attributes and elements are removed implicitly while parsing.
609 ;;; 4.2 Whitespace
610 ;;; All character data is discarded while parsing (which can only be
611 ;;; whitespace after validation).
613 ;;; Whitespace in name, type, and combine attributes is stripped while
614 ;;; parsing. Ditto for <name/>.
616 ;;; 4.3. datatypeLibrary attribute
617 ;;; Escaping is done by p/pattern.
618 ;;; Attribute value defaulting is done using *datatype-library*; only
619 ;;; p/data and p/value record the computed value.
621 ;;; 4.4. type attribute of value element
622 ;;; Done by p/value.
624 ;;; 4.5. href attribute
625 ;;; Escaping is done by p/include and p/external-ref.
627 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
628 ;;; but that requires xstream hacking.
630 ;;; 4.6. externalRef element
631 ;;; Done by p/external-ref.
633 ;;; 4.7. include element
634 ;;; Done by p/include.
637 ;;;; tests
639 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
640 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
641 (let ((pass 0)
642 (total 0))
643 (dolist (d (directory p))
644 (let ((name (car (last (pathname-directory d)))))
645 (when (parse-integer name :junk-allowed t)
646 (incf total)
647 (when (test1 d)
648 (incf pass)))))
649 (format t "Passed ~D/~D tests.~%" pass total))
650 (dribble))
652 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
653 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
655 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
656 (let* ((*debug* t)
657 (d (merge-pathnames (format nil "~3,'0D/" n) p))
658 (i (merge-pathnames "i.rng" d))
659 (c (merge-pathnames "c.rng" d))
660 (rng (if (probe-file c) c i)))
661 (format t "~A: " (car (last (pathname-directory d))))
662 (print rng)
663 (parse-relax-ng rng)))
665 (defun test1 (d)
666 (let* ((i (merge-pathnames "i.rng" d))
667 (c (merge-pathnames "c.rng" d)))
668 (format t "~A: " (car (last (pathname-directory d))))
669 (if (probe-file c)
670 (handler-case
671 (progn
672 (parse-relax-ng c)
673 (format t " PASS~%")
675 (error (c)
676 (format t " FAIL: ~A~%" c)
677 nil))
678 (handler-case
679 (progn
680 (parse-relax-ng i)
681 (format t " FAIL: didn't detect invalid schema~%")
682 nil)
683 (rng-error (c)
684 (format t " PASS: ~S~%" (type-of c))
686 (error (c)
687 (format t " FAIL: incorrect condition type: ~A~%" c)
688 nil)))))