pattern? und combinations repariert
[cxml-rng.git] / parse.lisp
blobfe8e9ec9683bdcfc3002da2b0635c9324b084cb3
1 (in-package :cxml-rng)
4 ;;;; Errors
6 (define-condition rng-error (simple-error) ())
8 (defun rng-error (source fmt &rest args)
9 (let ((s (make-string-output-stream)))
10 (apply #'format s fmt args)
11 (when source
12 (format s "~& [ Error at line ~D, column ~D in ~S ]"
13 (klacks:current-line-number source)
14 (klacks:current-column-number source)
15 (klacks:current-system-id source)))
16 (error 'rng-error
17 :format-control "~A"
18 :format-arguments (list (get-output-stream-string s)))))
21 ;;;; Parser
23 (defvar *datatype-library*)
24 (defvar *entity-resolver*)
25 (defvar *external-href-stack*)
26 (defvar *include-href-stack*)
28 (defvar *debug* nil)
30 (defun invoke-with-klacks-handler (fn source)
31 (if *debug*
32 (funcall fn)
33 (handler-case
34 (funcall fn)
35 (cxml:xml-parse-error (c)
36 (rng-error source "Cannot parse schema: ~A" c)))))
38 (defun parse-relax-ng (input &key entity-resolver)
39 (klacks:with-open-source (source (cxml:make-source input))
40 (invoke-with-klacks-handler
41 (lambda ()
42 (klacks:find-event source :start-element)
43 (let ((*datatype-library* "")
44 (*entity-resolver* entity-resolver)
45 (*external-href-stack* '())
46 (*include-href-stack* '()))
47 (p/pattern source)))
48 source)))
51 ;;;; pattern structures
53 (defstruct pattern
54 ns)
56 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
57 possibilities)
59 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
60 name)
62 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
63 children)
65 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
66 child)
68 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
69 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
70 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
71 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
72 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
73 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
74 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
75 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
77 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
79 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
81 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
82 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
84 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
85 datatype-library)
87 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
88 string)
90 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
91 type
92 params
93 except)
95 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
97 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
98 content)
101 ;;;; non-pattern
103 (defstruct param
104 name
105 string)
107 (defstruct start
108 combine
109 child)
111 (defstruct define
112 name
113 combine
114 children)
116 (defstruct div
117 content)
119 (defstruct include
120 href
121 content)
124 ;;;; parser
126 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
128 (defun skip-foreign* (source)
129 (loop
130 (case (klacks:peek-next source)
131 (:start-element (skip-foreign source))
132 (:end-element (return)))))
134 (defun skip-foreign (source)
135 (when (equal (klacks:current-uri source) *rng-namespace*)
136 (rng-error source
137 "invalid schema: ~A not allowed here"
138 (klacks:current-lname source)))
139 (klacks:serialize-element source nil))
141 (defun attribute (lname attrs)
142 (let ((a (sax:find-attribute-ns "" lname attrs)))
143 (if a
144 (sax:attribute-value a)
145 nil)))
147 (defvar *whitespace*
148 (format nil "~C~C~C"
149 (code-char 9)
150 (code-char 32)
151 (code-char 13)
152 (code-char 10)))
154 (defun ntc (lname attrs)
155 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
156 (let ((a (sax:find-attribute-ns "" lname attrs)))
157 (if a
158 (string-trim *whitespace* (sax:attribute-value a))
159 nil)))
161 (defmacro with-datatype-library (attrs &body body)
162 `(invoke-with-datatype-library (lambda () ,@body) ,attrs))
164 (defun invoke-with-datatype-library (fn attrs)
165 (let* ((dl (attribute "datatypeLibrary" attrs))
166 (*datatype-library* (if dl (escape-uri dl) *datatype-library*)))
167 (funcall fn)))
169 (defun p/pattern (source)
170 (let* ((lname (klacks:current-lname source))
171 (attrs (klacks:list-attributes source))
172 (ns (attribute "ns" attrs)))
173 (with-datatype-library attrs
174 (case (find-symbol lname :keyword)
175 (:|element| (p/element source (ntc "name" attrs) ns))
176 (:|attribute| (p/attribute source (ntc "name" attrs) ns))
177 (:|group| (p/combination #'make-group source ns))
178 (:|interleave| (p/combination #'make-interleave source ns))
179 (:|choice| (p/combination #'make-choice source ns))
180 (:|optional| (p/combination #'make-optional source ns))
181 (:|zeroOrMore| (p/combination #'make-zero-or-more source ns))
182 (:|oneOrMore| (p/combination #'make-one-or-more source ns))
183 (:|list| (p/combination #'make-list-pattern source ns))
184 (:|mixed| (p/combination #'make-mixed source ns))
185 (:|ref| (p/ref source ns))
186 (:|parentRef| (p/parent-ref source ns))
187 (:|empty| (p/empty source ns))
188 (:|text| (p/text source ns))
189 (:|value| (p/value source ns))
190 (:|data| (p/data source ns))
191 (:|externalRef| (p/external-ref source ns))
192 (:|grammar| (p/grammar source ns))
193 (t (skip-foreign source))))))
195 (defun p/pattern+ (source)
196 (let ((children nil))
197 (loop
198 (case (klacks:peek-next source)
199 (:start-element
200 (let ((p (p/pattern source))) (when p (push p children))))
201 (:end-element (return))))
202 (unless children
203 (rng-error source "empty element"))
204 (nreverse children)))
206 (defun p/pattern? (source)
207 (let ((result nil))
208 (loop
209 (case (klacks:peek-next source)
210 (:start-element
211 (when result
212 (rng-error source "at most one pattern expected here"))
213 (setf result (p/pattern source)))
214 (:end-element
215 (return result))))))
217 (defun p/element (source name ns)
218 (klacks:expecting-element (source "element")
219 (let ((result (make-element :ns ns)))
220 (if name
221 (setf (pattern-name result) (list :name name))
222 (setf (pattern-name result) (p/name-class source)))
223 (setf (pattern-children result) (p/pattern+ source))
224 result)))
226 (defun p/attribute (source name ns)
227 (klacks:expecting-element (source "attribute")
228 (let ((result (make-attribute :ns ns)))
229 (if name
230 (setf (pattern-name result) (list :name name))
231 (setf (pattern-name result) (p/name-class source)))
232 (setf (pattern-child result) (p/pattern? source))
233 result)))
235 (defun p/combination (constructor source ns)
236 (klacks:expecting-element (source)
237 (let ((possibilities (p/pattern+ source)))
238 (funcall constructor :possibilities possibilities :ns ns))))
240 (defun p/ref (source ns)
241 (klacks:expecting-element (source "ref")
242 (make-ref :name (ntc "name" (klacks:list-attributes source))
243 :ns ns)))
245 (defun p/parent-ref (source ns)
246 (klacks:expecting-element (source "parentRef")
247 (make-parent-ref :name (ntc "name" (klacks:list-attributes source))
248 :ns ns)))
250 (defun p/empty (source ns)
251 (klacks:expecting-element (source "empty")
252 (skip-foreign* source)
253 (make-empty :ns ns)))
255 (defun p/text (source ns)
256 (klacks:expecting-element (source "text")
257 (skip-foreign* source)
258 (make-text :ns ns)))
260 (defun parse-characters (source)
261 ;; fixme
262 (let ((tmp ""))
263 (loop
264 (multiple-value-bind (key data) (klacks:peek-next source)
265 (case key
266 (:characters
267 (setf tmp (concatenate 'string tmp data)))
268 (:end-element (return)))))
269 tmp))
271 (defun p/value (source ns)
272 (klacks:expecting-element (source "value")
273 (let* ((type (ntc "type" (klacks:list-attributes source)))
274 (string (parse-characters source))
275 (dl *datatype-library*))
276 (unless type
277 (setf type "token")
278 (setf dl ""))
279 (make-value :string string :type type :datatype-library dl :ns ns))))
281 (defun p/data (source ns)
282 (klacks:expecting-element (source "data")
283 (let* ((type (ntc "type" (klacks:list-attributes source)))
284 (result (make-data :type type
285 :datatype-library *datatype-library*
286 :ns ns))
287 (params '()))
288 (loop
289 (multiple-value-bind (key lname)
290 (klacks:peek-next source)
291 (case key
292 (:start-element
293 (case (find-symbol lname :keyword)
294 (:|param| (push (p/param source) params))
295 (:|except|
296 (setf (pattern-except result) (p/except-pattern source))
297 (return))
298 (t (skip-foreign source))))
299 (:end-element
300 (return)))))
301 (setf (pattern-params result) (nreverse params))
302 result)))
304 (defun p/param (source)
305 (klacks:expecting-element (source "param")
306 (let ((name (ntc "name" (klacks:list-attributes source)))
307 (string (parse-characters source)))
308 (make-param :name name :string string))))
310 (defun p/except-pattern (source)
311 (klacks:expecting-element (source "except")
312 (with-datatype-library (klacks:list-attributes source)
313 (p/pattern+ source))))
315 (defun p/not-allowed (source ns)
316 (klacks:expecting-element (source "notAllowed")
317 (make-not-allowed :ns ns)))
319 (defun safe-parse-uri (source str &optional base)
320 (when (zerop (length str))
321 (rng-error source "missing URI"))
322 (handler-case
323 (if base
324 (puri:merge-uris str base)
325 (puri:parse-uri str))
326 (puri:uri-parse-error ()
327 (rng-error source "invalid URI: ~A" str))))
329 (defun p/external-ref (source ns)
330 (klacks:expecting-element (source "externalRef")
331 (let ((href
332 (escape-uri (attribute "href" (klacks:list-attributes source))))
333 (base (klacks:current-xml-base source)))
334 (when (find href *include-href-stack* :test #'string=)
335 (rng-error source "looping include"))
336 (let* ((*include-href-stack* (cons href *include-href-stack*))
337 (uri (safe-parse-uri source href base))
338 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
339 (result
340 (klacks:with-open-source (source (cxml:make-source xstream))
341 (invoke-with-klacks-handler
342 (lambda ()
343 (klacks:find-event source :start-element)
344 (let ((*datatype-library* ""))
345 (p/pattern source)))
346 source))))
347 (unless (pattern-ns result)
348 (setf (pattern-ns result) ns))
349 (skip-foreign* source)
350 result))))
352 (defun p/grammar (source ns)
353 (klacks:expecting-element (source "grammar")
354 (make-grammar :content (p/grammar-content* source) :ns ns)))
356 (defun p/grammar-content* (source &key disallow-include)
357 (let ((content nil))
358 (loop
359 (multiple-value-bind (key lname) (klacks:peek-next source)
360 (case key
361 (:start-element
362 (with-datatype-library (klacks:list-attributes source)
363 (case (find-symbol lname :keyword)
364 (:|start| (push (p/start source) content))
365 (:|define| (push (p/define source) content))
366 (:|div| (push (p/div source) content))
367 (:|include|
368 (when disallow-include
369 (rng-error source "nested include not permitted"))
370 (push (p/include source) content))
371 (t (skip-foreign source)))))
372 (:end-element (return)))))
373 (nreverse content)))
375 (defun p/start (source)
376 (klacks:expecting-element (source "start")
377 (let ((combine (ntc "combine" source))
378 (child (p/pattern source)))
379 (make-start :combine (find-symbol (string-upcase combine) :keyword)
380 :child child))))
382 (defun p/define (source)
383 (klacks:expecting-element (source "define")
384 (let ((name (ntc "name" source))
385 (combine (ntc "combine" source))
386 (children (p/pattern+ source)))
387 (make-define :name name
388 :combine (find-symbol (string-upcase combine) :keyword)
389 :children children))))
391 (defun p/div (source)
392 (klacks:expecting-element (source "div")
393 (make-div :content (p/grammar-content* source))))
395 (defun p/include (source)
396 (klacks:expecting-element (source "include")
397 (let ((href
398 (escape-uri (attribute "href" (klacks:list-attributes source))))
399 (base (klacks:current-xml-base source))
400 (include-content (p/grammar-content* source :disallow-include t)))
401 (when (find href *include-href-stack* :test #'string=)
402 (rng-error source "looping include"))
403 (let* ((*include-href-stack* (cons href *include-href-stack*))
404 (uri (safe-parse-uri source href base))
405 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
406 (grammar
407 (klacks:with-open-source (source (cxml:make-source xstream))
408 (invoke-with-klacks-handler
409 (lambda ()
410 (klacks:find-event source :start-element)
411 (let ((*datatype-library* ""))
412 (p/grammar source "wrong://")))
413 source)))
414 (grammar-content (pattern-content grammar)))
415 (klacks:consume source)
416 (make-div :children
417 (cons (make-div :children
418 (simplify-include source
419 grammar-content
420 include-content))
421 include-content))))))
423 (defun simplify-include/map (fn l)
424 (loop
425 for x in l
426 for value = (let ((result (funcall fn x)))
427 (when (typep x 'div)
428 (loop
429 for x in (div-content x)
430 for value = (funcall fn x)
431 when value
432 collect value into content
433 finally
434 (setf (div-content x) content)))
435 result)
436 when value
437 collect value))
439 (defun simplify-include/start (source grammar-content include-content)
440 (let ((startp
441 (block nil
442 (simplify-include/map (lambda (x)
443 (when (typep x 'start)
444 (return t))
446 include-content))))
447 (if startp
448 (let ((ok nil))
449 (prog1
450 (simplify-include/map (lambda (x)
451 (cond
452 ((typep x 'start) (setf ok t) nil)
453 (t x)))
454 grammar-content))
455 (unless ok
456 (rng-error source "expected start in grammar")))
457 grammar-content)))
459 (defun simplify-include/define (source grammar-content include-content)
460 (let ((defines '()))
461 (simplify-include/map (lambda (x)
462 (when (typep x 'define)
463 (push (cons x nil) defines))
465 include-content)
466 (prog1
467 (simplify-include/map
468 (lambda (x)
469 (if (typep x 'define)
470 (let ((cons (find (define-name x) defines :key #'car)))
471 (cond
472 (cons
473 (setf (cdr cons) t)
474 nil)
476 x)))
478 grammar-content)
479 (loop for (define . okp) in defines do
480 (unless okp
481 (rng-error source "expected matching ~A in grammar" define))))))
483 (defun simplify-include (source grammar-content include-content)
484 (simplify-include/define
485 source
486 (simplify-include/start source grammar-content include-content)
487 include-content))
489 (defun p/name-class (source)
490 (klacks:expecting-element (source)
491 (with-datatype-library (klacks:list-attributes source)
492 (case (find-symbol (klacks:current-lname source) :keyword)
493 (:|name|
494 (list :name (string-trim *whitespace* (parse-characters source))))
495 (:|anyName|
496 (cons :any (p/except-name-class? source)))
497 (:|nsName|
498 (cons :ns (p/except-name-class? source)))
499 (:|choice|
500 (cons :choice (p/name-class* source)))
502 (skip-foreign source))))))
504 (defun p/name-class* (source)
505 (let ((results nil))
506 (loop
507 (case (klacks:peek-next source)
508 (:start-element (push (p/name-class source) results))
509 (:end-element (return))))
510 (nreverse results)))
512 (defun p/except-name-class? (source)
513 (loop
514 (multiple-value-bind (key lname)
515 (klacks:peek-next source)
516 (unless (eq key :start-element)
517 (return))
518 (when (string= (find-symbol lname :keyword) "except")
519 (return (p/except-name-class source)))
520 (skip-foreign source))))
522 (defun p/except-name-class (source)
523 (klacks:expecting-element (source "except")
524 (with-datatype-library (klacks:list-attributes source)
525 (cons :except (p/name-class source)))))
527 (defun escape-uri (string)
528 (with-output-to-string (out)
529 (loop for c across (cxml::rod-to-utf8-string string) do
530 (let ((code (char-code c)))
531 ;; http://www.w3.org/TR/xlink/#link-locators
532 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
533 (format out "%~2,'0X" code)
534 (write-char c out))))))
537 ;;;; simplification
539 ;;; 4.1 Annotations
540 ;;; Foreign attributes and elements are removed implicitly while parsing.
542 ;;; 4.2 Whitespace
543 ;;; All character data is discarded while parsing (which can only be
544 ;;; whitespace after validation).
546 ;;; Whitespace in name, type, and combine attributes is stripped while
547 ;;; parsing. Ditto for <name/>.
549 ;;; 4.3. datatypeLibrary attribute
550 ;;; Escaping is done by p/pattern.
551 ;;; Attribute value defaulting is done using *datatype-library*; only
552 ;;; p/data and p/value record the computed value.
554 ;;; 4.4. type attribute of value element
555 ;;; Done by p/value.
557 ;;; 4.5. href attribute
558 ;;; Escaping is done by p/include and p/external-ref.
560 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
561 ;;; but that requires xstream hacking.
563 ;;; 4.6. externalRef element
564 ;;; Done by p/external-ref.
566 ;;; 4.7. include element
567 ;;; Done by p/include.
570 ;;;; tests
572 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
573 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
574 (let ((pass 0)
575 (total 0))
576 (dolist (d (directory p))
577 (let ((name (car (last (pathname-directory d)))))
578 (when (parse-integer name :junk-allowed t)
579 (incf total)
580 (when (test1 d)
581 (incf pass)))))
582 (format t "Passed ~D/~D tests.~%" pass total))
583 (dribble))
585 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
586 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
588 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
589 (let* ((*debug* t)
590 (d (merge-pathnames (format nil "~3,'0D/" n) p))
591 (i (merge-pathnames "i.rng" d))
592 (c (merge-pathnames "c.rng" d))
593 (rng (if (probe-file c) c i)))
594 (format t "~A: " (car (last (pathname-directory d))))
595 (print rng)
596 (parse-relax-ng rng)))
598 (defun test1 (d)
599 (let* ((i (merge-pathnames "i.rng" d))
600 (c (merge-pathnames "c.rng" d)))
601 (format t "~A: " (car (last (pathname-directory d))))
602 (if (probe-file c)
603 (handler-case
604 (progn
605 (parse-relax-ng c)
606 (format t " PASS~%")
608 (error (c)
609 (format t " FAIL: ~A~%" c)
610 nil))
611 (handler-case
612 (progn
613 (parse-relax-ng i)
614 (format t " FAIL: didn't detect invalid schema~%")
615 nil)
616 (rng-error (c)
617 (format t " PASS: ~S~%" (type-of c))
619 (error (c)
620 (format t " FAIL: incorrect condition type: ~A~%" c)
621 nil)))))