noch skip-foreign
[cxml-rng.git] / parse.lisp
blob8344ca0778fabe998a8cea6da5a70fdc30b2842b
1 (in-package :cxml-rng)
4 ;;;; Errors
6 (define-condition rng-error (simple-error) ())
8 (defun rng-error (source fmt &rest args)
9 (let ((s (make-string-output-stream)))
10 (apply #'format s fmt args)
11 (when source
12 (format s "~& [ Error at line ~D, column ~D in ~S ]"
13 (klacks:current-line-number source)
14 (klacks:current-column-number source)
15 (klacks:current-system-id source)))
16 (error 'rng-error
17 :format-control "~A"
18 :format-arguments (list (get-output-stream-string s)))))
21 ;;;; Parser
23 (defvar *datatype-library*)
24 (defvar *entity-resolver*)
25 (defvar *external-href-stack*)
26 (defvar *include-href-stack*)
28 (defvar *debug* nil)
30 (defun invoke-with-klacks-handler (fn source)
31 (if *debug*
32 (funcall fn)
33 (handler-case
34 (funcall fn)
35 (cxml:xml-parse-error (c)
36 (rng-error source "Cannot parse schema: ~A" c)))))
38 (defun parse-relax-ng (input &key entity-resolver)
39 (klacks:with-open-source (source (cxml:make-source input))
40 (invoke-with-klacks-handler
41 (lambda ()
42 (klacks:find-event source :start-element)
43 (let ((*datatype-library* "")
44 (*entity-resolver* entity-resolver)
45 (*external-href-stack* '())
46 (*include-href-stack* '()))
47 (p/pattern source)))
48 source)))
51 ;;;; pattern structures
53 (defstruct pattern
54 ns)
56 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
57 possibilities)
59 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
60 name)
62 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
63 children)
65 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
66 child)
68 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
69 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
70 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
71 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
72 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
73 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
74 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
75 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
77 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
79 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
81 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
82 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
84 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
85 datatype-library)
87 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
88 string)
90 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
91 type
92 params
93 except)
95 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
97 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
98 content)
101 ;;;; non-pattern
103 (defstruct param
104 name
105 string)
107 (defstruct start
108 combine
109 child)
111 (defstruct define
112 name
113 combine
114 children)
116 (defstruct div
117 content)
119 (defstruct include
120 href
121 content)
124 ;;;; parser
126 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
128 (defun skip-foreign* (source)
129 (loop
130 (case (klacks:peek-next source)
131 (:start-element (skip-foreign source))
132 (:end-element (return)))))
134 (defun skip-foreign (source)
135 (when (equal (klacks:current-uri source) *rng-namespace*)
136 (rng-error source
137 "invalid schema: ~A not allowed here"
138 (klacks:current-lname source)))
139 (klacks:serialize-element source nil))
141 (defun attribute (lname attrs)
142 (let ((a (sax:find-attribute-ns "" lname attrs)))
143 (if a
144 (sax:attribute-value a)
145 nil)))
147 (defvar *whitespace*
148 (format nil "~C~C~C"
149 (code-char 9)
150 (code-char 32)
151 (code-char 13)
152 (code-char 10)))
154 (defun ntc (lname attrs)
155 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
156 (let ((a (sax:find-attribute-ns "" lname attrs)))
157 (if a
158 (string-trim *whitespace* (sax:attribute-value a))
159 nil)))
161 (defmacro with-datatype-library (attrs &body body)
162 `(invoke-with-datatype-library (lambda () ,@body) ,attrs))
164 (defun invoke-with-datatype-library (fn attrs)
165 (let* ((dl (attribute "datatypeLibrary" attrs))
166 (*datatype-library* (if dl (escape-uri dl) *datatype-library*)))
167 (funcall fn)))
169 (defun p/pattern (source)
170 (let* ((lname (klacks:current-lname source))
171 (attrs (klacks:list-attributes source))
172 (ns (attribute "ns" attrs)))
173 (with-datatype-library attrs
174 (case (find-symbol lname :keyword)
175 (:|element| (p/element source (ntc "name" attrs) ns))
176 (:|attribute| (p/attribute source (ntc "name" attrs) ns))
177 (:|group| (p/combination #'make-group source ns))
178 (:|interleave| (p/combination #'make-interleave source ns))
179 (:|choice| (p/combination #'make-choice source ns))
180 (:|optional| (p/combination #'make-optional source ns))
181 (:|zeroOrMore| (p/combination #'make-zero-or-more source ns))
182 (:|oneOrMore| (p/combination #'make-one-or-more source ns))
183 (:|list| (p/combination #'make-list-pattern source ns))
184 (:|mixed| (p/combination #'make-mixed source ns))
185 (:|ref| (p/ref source ns))
186 (:|parentRef| (p/parent-ref source ns))
187 (:|empty| (p/empty source ns))
188 (:|text| (p/text source ns))
189 (:|value| (p/value source ns))
190 (:|data| (p/data source ns))
191 (:|externalRef| (p/external-ref source ns))
192 (:|grammar| (p/grammar source ns))
193 (t (skip-foreign source))))))
195 (defun p/pattern+ (source)
196 (let ((children nil))
197 (loop
198 (case (klacks:peek-next source)
199 (:start-element
200 (let ((p (p/pattern source))) (when p (push p children))))
201 (:end-element (return))))
202 (unless children
203 (rng-error source "empty element"))
204 (nreverse children)))
206 (defun p/pattern? (source)
207 (loop
208 (case (klacks:peek-next source)
209 (:start-element (return (p/pattern source)))
210 (:end-element (return)))))
212 (defun p/element (source name ns)
213 (klacks:expecting-element (source "element")
214 (let ((result (make-element :ns ns)))
215 (if name
216 (setf (pattern-name result) (list :name name))
217 (setf (pattern-name result) (p/name-class source)))
218 (setf (pattern-children result) (p/pattern+ source))
219 result)))
221 (defun p/attribute (source name ns)
222 (klacks:expecting-element (source "attribute")
223 (let ((result (make-attribute :ns ns)))
224 (if name
225 (setf (pattern-name result) (list :name name))
226 (setf (pattern-name result) (p/name-class source)))
227 (setf (pattern-child result) (p/pattern? source))
228 result)))
230 (defun p/combination (constructor source ns)
231 (klacks:expecting-element (source)
232 (let ((possibility (p/pattern+ source)))
233 (funcall constructor :possibility possibility :ns ns))))
235 (defun p/ref (source ns)
236 (klacks:expecting-element (source "ref")
237 (make-ref :name (ntc "name" (klacks:list-attributes source))
238 :ns ns)))
240 (defun p/parent-ref (source ns)
241 (klacks:expecting-element (source "parentRef")
242 (make-parent-ref :name (ntc "name" (klacks:list-attributes source))
243 :ns ns)))
245 (defun p/empty (source ns)
246 (klacks:expecting-element (source "empty")
247 (skip-foreign* source)
248 (make-empty :ns ns)))
250 (defun p/text (source ns)
251 (klacks:expecting-element (source "text")
252 (skip-foreign* source)
253 (make-text :ns ns)))
255 (defun parse-characters (source)
256 ;; fixme
257 (let ((tmp ""))
258 (loop
259 (multiple-value-bind (key data) (klacks:peek-next source)
260 (case key
261 (:characters
262 (setf tmp (concatenate 'string tmp data)))
263 (:end-element (return)))))
264 tmp))
266 (defun p/value (source ns)
267 (klacks:expecting-element (source "value")
268 (let* ((type (ntc "type" (klacks:list-attributes source)))
269 (string (parse-characters source))
270 (dl *datatype-library*))
271 (unless type
272 (setf type "token")
273 (setf dl ""))
274 (make-value :string string :type type :datatype-library dl :ns ns))))
276 (defun p/data (source ns)
277 (klacks:expecting-element (source "data")
278 (let* ((type (ntc "type" (klacks:list-attributes source)))
279 (result (make-data :type type
280 :datatype-library *datatype-library*
281 :ns ns))
282 (params '()))
283 (loop
284 (multiple-value-bind (key lname)
285 (klacks:peek-next source)
286 (case key
287 (:start-element
288 (case (find-symbol lname :keyword)
289 (:|param| (push (p/param source) params))
290 (:|except|
291 (setf (pattern-except result) (p/except-pattern source))
292 (return))
293 (t (skip-foreign source))))
294 (:end-element
295 (return)))))
296 (setf (pattern-params result) (nreverse params))
297 result)))
299 (defun p/param (source)
300 (klacks:expecting-element (source "param")
301 (let ((name (ntc "name" (klacks:list-attributes source)))
302 (string (parse-characters source)))
303 (make-param :name name :string string))))
305 (defun p/except-pattern (source)
306 (klacks:expecting-element (source "except")
307 (with-datatype-library (klacks:list-attributes source)
308 (p/pattern+ source))))
310 (defun p/not-allowed (source ns)
311 (klacks:expecting-element (source "notAllowed")
312 (make-not-allowed :ns ns)))
314 (defun safe-parse-uri (source str &optional base)
315 (when (zerop (length str))
316 (rng-error source "missing URI"))
317 (handler-case
318 (if base
319 (puri:merge-uris str base)
320 (puri:parse-uri str))
321 (puri:uri-parse-error ()
322 (rng-error source "invalid URI: ~A" str))))
324 (defun p/external-ref (source ns)
325 (klacks:expecting-element (source "externalRef")
326 (let ((href
327 (escape-uri (attribute "href" (klacks:list-attributes source))))
328 (base (klacks:current-xml-base source)))
329 (when (find href *include-href-stack* :test #'string=)
330 (rng-error source "looping include"))
331 (let* ((*include-href-stack* (cons href *include-href-stack*))
332 (uri (safe-parse-uri source href base))
333 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
334 (result
335 (klacks:with-open-source (source (cxml:make-source xstream))
336 (invoke-with-klacks-handler
337 (lambda ()
338 (klacks:find-event source :start-element)
339 (let ((*datatype-library* ""))
340 (p/pattern source)))
341 source))))
342 (unless (pattern-ns result)
343 (setf (pattern-ns result) ns))
344 (skip-foreign* source)
345 result))))
347 (defun p/grammar (source ns)
348 (klacks:expecting-element (source "grammar")
349 (make-grammar :content (p/grammar-content* source) :ns ns)))
351 (defun p/grammar-content* (source &key disallow-include)
352 (let ((content nil))
353 (loop
354 (multiple-value-bind (key lname) (klacks:peek-next source)
355 (case key
356 (:start-element
357 (with-datatype-library (klacks:list-attributes source)
358 (case (find-symbol lname :keyword)
359 (:|start| (push (p/start source) content))
360 (:|define| (push (p/define source) content))
361 (:|div| (push (p/div source) content))
362 (:|include|
363 (when disallow-include
364 (rng-error source "nested include not permitted"))
365 (push (p/include source) content))
366 (t (skip-foreign source)))))
367 (:end-element (return)))))
368 (nreverse content)))
370 (defun p/start (source)
371 (klacks:expecting-element (source "start")
372 (let ((combine (ntc "combine" source))
373 (child (p/pattern source)))
374 (make-start :combine (find-symbol (string-upcase combine) :keyword)
375 :child child))))
377 (defun p/define (source)
378 (klacks:expecting-element (source "define")
379 (let ((name (ntc "name" source))
380 (combine (ntc "combine" source))
381 (children (p/pattern+ source)))
382 (make-define :name name
383 :combine (find-symbol (string-upcase combine) :keyword)
384 :children children))))
386 (defun p/div (source)
387 (klacks:expecting-element (source "div")
388 (make-div :content (p/grammar-content* source))))
390 (defun p/include (source)
391 (klacks:expecting-element (source "include")
392 (let ((href
393 (escape-uri (attribute "href" (klacks:list-attributes source))))
394 (base (klacks:current-xml-base source))
395 (include-content (p/grammar-content* source :disallow-include t)))
396 (when (find href *include-href-stack* :test #'string=)
397 (rng-error source "looping include"))
398 (let* ((*include-href-stack* (cons href *include-href-stack*))
399 (uri (safe-parse-uri source href base))
400 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
401 (grammar
402 (klacks:with-open-source (source (cxml:make-source xstream))
403 (invoke-with-klacks-handler
404 (lambda ()
405 (klacks:find-event source :start-element)
406 (let ((*datatype-library* ""))
407 (p/grammar source "wrong://")))
408 source)))
409 (grammar-content (pattern-content grammar)))
410 (klacks:consume source)
411 (make-div :children
412 (cons (make-div :children
413 (simplify-include source
414 grammar-content
415 include-content))
416 include-content))))))
418 (defun simplify-include/map (fn l)
419 (loop
420 for x in l
421 for value = (let ((result (funcall fn x)))
422 (when (typep x 'div)
423 (loop
424 for x in (div-content x)
425 for value = (funcall fn x)
426 when value
427 collect value into content
428 finally
429 (setf (div-content x) content)))
430 result)
431 when value
432 collect value))
434 (defun simplify-include/start (source grammar-content include-content)
435 (let ((startp
436 (block nil
437 (simplify-include/map (lambda (x)
438 (when (typep x 'start)
439 (return t))
441 include-content))))
442 (if startp
443 (let ((ok nil))
444 (prog1
445 (simplify-include/map (lambda (x)
446 (cond
447 ((typep x 'start) (setf ok t) nil)
448 (t x)))
449 grammar-content))
450 (unless ok
451 (rng-error source "expected start in grammar")))
452 grammar-content)))
454 (defun simplify-include/define (source grammar-content include-content)
455 (let ((defines '()))
456 (simplify-include/map (lambda (x)
457 (when (typep x 'define)
458 (push (cons x nil) defines))
460 include-content)
461 (prog1
462 (simplify-include/map
463 (lambda (x)
464 (if (typep x 'define)
465 (let ((cons (find (define-name x) defines :key #'car)))
466 (cond
467 (cons
468 (setf (cdr cons) t)
469 nil)
471 x)))
473 grammar-content)
474 (loop for (define . okp) in defines do
475 (unless okp
476 (rng-error source "expected matching ~A in grammar" define))))))
478 (defun simplify-include (source grammar-content include-content)
479 (simplify-include/define
480 source
481 (simplify-include/start source grammar-content include-content)
482 include-content))
484 (defun p/name-class (source)
485 (klacks:expecting-element (source)
486 (with-datatype-library (klacks:list-attributes source)
487 (case (find-symbol (klacks:current-lname source) :keyword)
488 (:|name|
489 (list :name (string-trim *whitespace* (parse-characters source))))
490 (:|anyName|
491 (cons :any (p/except-name-class? source)))
492 (:|nsName|
493 (cons :ns (p/except-name-class? source)))
494 (:|choice|
495 (cons :choice (p/name-class* source)))
497 (skip-foreign source))))))
499 (defun p/name-class* (source)
500 (let ((results nil))
501 (loop
502 (case (klacks:peek-next source)
503 (:start-element (push (p/name-class source) results))
504 (:end-element (return))))
505 (nreverse results)))
507 (defun p/except-name-class? (source)
508 (loop
509 (multiple-value-bind (key lname)
510 (klacks:peek-next source)
511 (unless (eq key :start-element)
512 (return))
513 (when (string= (find-symbol lname :keyword) "except")
514 (return (p/except-name-class source)))
515 (skip-foreign source))))
517 (defun p/except-name-class (source)
518 (klacks:expecting-element (source "except")
519 (with-datatype-library (klacks:list-attributes source)
520 (cons :except (p/name-class source)))))
522 (defun escape-uri (string)
523 (with-output-to-string (out)
524 (loop for c across (cxml::rod-to-utf8-string string) do
525 (let ((code (char-code c)))
526 ;; http://www.w3.org/TR/xlink/#link-locators
527 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
528 (format out "%~2,'0X" code)
529 (write-char c out))))))
532 ;;;; simplification
534 ;;; 4.1 Annotations
535 ;;; Foreign attributes and elements are removed implicitly while parsing.
537 ;;; 4.2 Whitespace
538 ;;; All character data is discarded while parsing (which can only be
539 ;;; whitespace after validation).
541 ;;; Whitespace in name, type, and combine attributes is stripped while
542 ;;; parsing. Ditto for <name/>.
544 ;;; 4.3. datatypeLibrary attribute
545 ;;; Escaping is done by p/pattern.
546 ;;; Attribute value defaulting is done using *datatype-library*; only
547 ;;; p/data and p/value record the computed value.
549 ;;; 4.4. type attribute of value element
550 ;;; Done by p/value.
552 ;;; 4.5. href attribute
553 ;;; Escaping is done by p/include and p/external-ref.
555 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
556 ;;; but that requires xstream hacking.
558 ;;; 4.6. externalRef element
559 ;;; Done by p/external-ref.
561 ;;; 4.7. include element
562 ;;; Done by p/include.
565 ;;;; tests
567 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
568 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
569 (let ((pass 0)
570 (total 0))
571 (dolist (d (directory p))
572 (let ((name (car (last (pathname-directory d)))))
573 (when (parse-integer name :junk-allowed t)
574 (incf total)
575 (when (test1 d)
576 (incf pass)))))
577 (format t "Passed ~D/~D tests.~%" pass total))
578 (dribble))
580 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
581 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
583 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
584 (let* ((*debug* t)
585 (d (merge-pathnames (format nil "~3,'0D/" n) p))
586 (i (merge-pathnames "i.rng" d))
587 (c (merge-pathnames "c.rng" d))
588 (rng (if (probe-file c) c i)))
589 (format t "~A: " (car (last (pathname-directory d))))
590 (print rng)
591 (parse-relax-ng rng)))
593 (defun test1 (d)
594 (let* ((i (merge-pathnames "i.rng" d))
595 (c (merge-pathnames "c.rng" d)))
596 (format t "~A: " (car (last (pathname-directory d))))
597 (if (probe-file c)
598 (handler-case
599 (progn
600 (parse-relax-ng c)
601 (format t " PASS~%")
603 (error (c)
604 (format t " FAIL: ~A~%" c)
605 nil))
606 (handler-case
607 (progn
608 (parse-relax-ng i)
609 (format t " FAIL: didn't detect invalid schema~%")
610 nil)
611 (rng-error (c)
612 (format t " PASS: ~S~%" (type-of c))
614 (error (c)
615 (format t " FAIL: incorrect condition type: ~A~%" c)
616 nil)))))