bei pass erstmal keinen fehler ausgeben
[cxml-rng.git] / parse.lisp
blob6d236acd87f696eb9b17aebf9f6c6c61f9eb5ea5
1 (in-package :cxml-rng)
4 ;;;; Errors
6 (define-condition rng-error (simple-error) ())
8 (defun rng-error (source fmt &rest args)
9 (let ((s (make-string-output-stream)))
10 (apply #'format s fmt args)
11 (when source
12 (format s "~& [ Error at line ~D, column ~D in ~S ]"
13 (klacks:current-line-number source)
14 (klacks:current-column-number source)
15 (klacks:current-system-id source)))
16 (error 'rng-error
17 :format-control "~A"
18 :format-arguments (list (get-output-stream-string s)))))
21 ;;;; Parser
23 (defvar *datatype-library*)
24 (defvar *entity-resolver*)
25 (defvar *external-href-stack*)
26 (defvar *include-href-stack*)
28 (defun invoke-with-klacks-handler (fn source)
29 (handler-case
30 (funcall fn)
31 (cxml:xml-parse-error (c)
32 (rng-error source "Cannot parse schema: ~A" c))))
34 (defun parse-relax-ng (input &key entity-resolver)
35 (klacks:with-open-source (source (cxml:make-source input))
36 (invoke-with-klacks-handler
37 (lambda ()
38 (klacks:find-event source :start-element)
39 (let ((*datatype-library* "")
40 (*entity-resolver* entity-resolver)
41 (*external-href-stack* '())
42 (*include-href-stack* '()))
43 (p/pattern source)))
44 source)))
47 ;;;; pattern structures
49 (defstruct pattern
50 ns)
52 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
53 possibilities)
55 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
56 name)
58 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
59 children)
61 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
62 child)
64 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
65 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
66 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
67 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
68 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
69 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
70 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
71 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
73 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
75 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
77 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
78 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
80 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
81 datatype-library)
83 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
84 string)
86 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
87 type
88 params
89 except)
91 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
93 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
94 content)
97 ;;;; non-pattern
99 (defstruct param
100 name
101 string)
103 (defstruct start
104 combine
105 child)
107 (defstruct define
108 name
109 combine
110 children)
112 (defstruct div
113 content)
115 (defstruct include
116 href
117 content)
120 ;;;; parser
122 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
124 (defun skip-foreign (source)
125 (when (equal (klacks:current-uri source) *rng-namespace*)
126 (rng-error source
127 "invalid schema: ~A not allowed here"
128 (klacks:current-lname source)))
129 (klacks:serialize-element source nil))
131 (defun attribute (lname attrs)
132 (let ((a (sax:find-attribute-ns "" lname attrs)))
133 (if a
134 (sax:attribute-value a)
135 nil)))
137 (defvar *whitespace*
138 (format nil "~C~C~C"
139 (code-char 9)
140 (code-char 32)
141 (code-char 13)
142 (code-char 10)))
144 (defun ntc (lname attrs)
145 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
146 (let ((a (sax:find-attribute-ns "" lname attrs)))
147 (if a
148 (string-trim *whitespace* (sax:attribute-value a))
149 nil)))
151 (defmacro with-datatype-library (attrs &body body)
152 `(invoke-with-datatype-library (lambda () ,@body) ,attrs))
154 (defun invoke-with-datatype-library (fn attrs)
155 (let* ((dl (attribute "datatypeLibrary" attrs))
156 (*datatype-library* (if dl (escape-uri dl) *datatype-library*)))
157 (funcall fn)))
159 (defun p/pattern (source)
160 (let* ((lname (klacks:current-lname source))
161 (attrs (klacks:list-attributes source))
162 (ns (attribute "ns" attrs)))
163 (with-datatype-library attrs
164 (case (find-symbol lname :keyword)
165 (:|element| (p/element source (ntc "name" attrs) ns))
166 (:|attribute| (p/attribute source (ntc "name" attrs) ns))
167 (:|group| (p/combination #'make-group source ns))
168 (:|interleave| (p/combination #'make-interleave source ns))
169 (:|choice| (p/combination #'make-choice source ns))
170 (:|optional| (p/combination #'make-optional source ns))
171 (:|zeroOrMore| (p/combination #'make-zero-or-more source ns))
172 (:|oneOrMore| (p/combination #'make-one-or-more source ns))
173 (:|list| (p/combination #'make-list-pattern source ns))
174 (:|mixed| (p/combination #'make-mixed source ns))
175 (:|ref| (p/ref source ns))
176 (:|parentRef| (p/parent-ref source ns))
177 (:|empty| (p/empty source ns))
178 (:|text| (p/text source ns))
179 (:|value| (p/value source ns))
180 (:|data| (p/data source ns))
181 (:|externalRef| (p/external-ref source ns))
182 (:|grammar| (p/grammar source ns))
183 (t (skip-foreign source))))))
185 (defun p/pattern+ (source)
186 (let ((children nil))
187 (loop
188 (case (klacks:peek-next source)
189 (:start-element
190 (let ((p (p/pattern source))) (when p (push p children))))
191 (:end-element (return))))
192 (unless children
193 (rng-error source "empty element"))
194 (nreverse children)))
196 (defun p/pattern? (source)
197 (loop
198 (case (klacks:peek-next source)
199 (:start-element (return (p/pattern source)))
200 (:end-element (return)))))
202 (defun p/element (source name ns)
203 (klacks:expecting-element (source "element")
204 (let ((result (make-element :ns ns)))
205 (if name
206 (setf (pattern-name result) (list :name name))
207 (setf (pattern-name result) (p/name-class source)))
208 (setf (pattern-children result) (p/pattern+ source))
209 result)))
211 (defun p/attribute (source name ns)
212 (klacks:expecting-element (source "attribute")
213 (let ((result (make-attribute :ns ns)))
214 (if name
215 (setf (pattern-name result) (list :name name))
216 (setf (pattern-name result) (p/name-class source)))
217 (setf (pattern-child result) (p/pattern? source))
218 result)))
220 (defun p/combination (constructor source ns)
221 (klacks:expecting-element (source)
222 (let ((possibility (p/pattern+ source)))
223 (funcall constructor :possibility possibility :ns ns))))
225 (defun p/ref (source ns)
226 (klacks:expecting-element (source "ref")
227 (make-ref :name (ntc "name" (klacks:list-attributes source))
228 :ns ns)))
230 (defun p/parent-ref (source ns)
231 (klacks:expecting-element (source "parentRef")
232 (make-parent-ref :name (ntc "name" (klacks:list-attributes source))
233 :ns ns)))
235 (defun p/empty (source ns)
236 (klacks:expecting-element (source "empty")
237 (klacks:consume source)
238 (make-empty :ns ns)))
240 (defun p/text (source ns)
241 (klacks:expecting-element (source "text")
242 (klacks:consume source)
243 (make-text :ns ns)))
245 (defun parse-characters (source)
246 ;; fixme
247 (let ((tmp ""))
248 (loop
249 (multiple-value-bind (key data) (klacks:peek-next source)
250 (case key
251 (:characters
252 (setf tmp (concatenate 'string tmp data)))
253 (:end-element (return)))))
254 tmp))
256 (defun p/value (source ns)
257 (klacks:expecting-element (source "value")
258 (let* ((type (ntc "type" (klacks:list-attributes source)))
259 (string (parse-characters source))
260 (dl *datatype-library*))
261 (unless type
262 (setf type "token")
263 (setf dl ""))
264 (make-value :string string :type type :datatype-library dl :ns ns))))
266 (defun p/data (source ns)
267 (klacks:expecting-element (source "data")
268 (let* ((type (ntc "type" (klacks:list-attributes source)))
269 (result (make-data :type type
270 :datatype-library *datatype-library*
271 :ns ns))
272 (params '()))
273 (loop
274 (multiple-value-bind (key lname)
275 (klacks:peek-next source)
276 (case key
277 (:start-element
278 (case (find-symbol lname :keyword)
279 (:|param| (push (p/param source) params))
280 (:|except|
281 (setf (pattern-except result) (p/except-pattern source))
282 (return))
283 (t (skip-foreign source))))
284 (:end-element
285 (return)))))
286 (setf (pattern-params result) (nreverse params))
287 result)))
289 (defun p/param (source)
290 (klacks:expecting-element (source "param")
291 (let ((name (ntc "name" (klacks:list-attributes source)))
292 (string (parse-characters source)))
293 (make-param :name name :string string))))
295 (defun p/except-pattern (source)
296 (klacks:expecting-element (source "except")
297 (with-datatype-library (klacks:list-attributes source)
298 (p/pattern+ source))))
300 (defun p/not-allowed (source ns)
301 (klacks:expecting-element (source "notAllowed")
302 (make-not-allowed :ns ns)))
304 (defun safe-parse-uri (source str &optional base)
305 (when (zerop (length str))
306 (rng-error source "missing URI"))
307 (handler-case
308 (if base
309 (puri:merge-uris str base)
310 (puri:parse-uri str))
311 (puri:uri-parse-error ()
312 (rng-error source "invalid URI: ~A" str))))
314 (defun p/external-ref (source ns)
315 (klacks:expecting-element (source "externalRef")
316 (let ((href
317 (escape-uri (attribute "href" (klacks:list-attributes source))))
318 (base (klacks:current-xml-base source)))
319 (when (find href *include-href-stack* :test #'string=)
320 (rng-error source "looping include"))
321 (let* ((*include-href-stack* (cons href *include-href-stack*))
322 (uri (safe-parse-uri source href base))
323 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
324 (result
325 (klacks:with-open-source (source (cxml:make-source xstream))
326 (invoke-with-klacks-handler
327 (lambda ()
328 (klacks:find-event source :start-element)
329 (let ((*datatype-library* ""))
330 (p/pattern source)))
331 source))))
332 (unless (pattern-ns result)
333 (setf (pattern-ns result) ns))
334 (klacks:consume source)
335 result))))
337 (defun p/grammar (source ns)
338 (klacks:expecting-element (source "grammar")
339 (make-grammar :content (p/grammar-content* source) :ns ns)))
341 (defun p/grammar-content* (source &key disallow-include)
342 (let ((content nil))
343 (loop
344 (multiple-value-bind (key lname) (klacks:peek-next source)
345 (case key
346 (:start-element
347 (with-datatype-library (klacks:list-attributes source)
348 (case (find-symbol lname :keyword)
349 (:|start| (push (p/start source) content))
350 (:|define| (push (p/define source) content))
351 (:|div| (push (p/div source) content))
352 (:|include|
353 (when disallow-include
354 (rng-error source "nested include not permitted"))
355 (push (p/include source) content))
356 (t (skip-foreign source)))))
357 (:end-element (return)))))
358 (nreverse content)))
360 (defun p/start (source)
361 (klacks:expecting-element (source "start")
362 (let ((combine (ntc "combine" source))
363 (child (p/pattern source)))
364 (make-start :combine (find-symbol (string-upcase combine) :keyword)
365 :child child))))
367 (defun p/define (source)
368 (klacks:expecting-element (source "define")
369 (let ((name (ntc "name" source))
370 (combine (ntc "combine" source))
371 (children (p/pattern+ source)))
372 (make-define :name name
373 :combine (find-symbol (string-upcase combine) :keyword)
374 :children children))))
376 (defun p/div (source)
377 (klacks:expecting-element (source "div")
378 (make-div :content (p/grammar-content* source))))
380 (defun p/include (source)
381 (klacks:expecting-element (source "include")
382 (let ((href
383 (escape-uri (attribute "href" (klacks:list-attributes source))))
384 (base (klacks:current-xml-base source))
385 (include-content (p/grammar-content* source :disallow-include t)))
386 (when (find href *include-href-stack* :test #'string=)
387 (rng-error source "looping include"))
388 (let* ((*include-href-stack* (cons href *include-href-stack*))
389 (uri (safe-parse-uri source href base))
390 (xstream (cxml::xstream-open-extid* *entity-resolver* nil uri))
391 (grammar
392 (klacks:with-open-source (source (cxml:make-source xstream))
393 (invoke-with-klacks-handler
394 (lambda ()
395 (klacks:find-event source :start-element)
396 (let ((*datatype-library* ""))
397 (p/grammar source "wrong://")))
398 source)))
399 (grammar-content (pattern-content grammar)))
400 (klacks:consume source)
401 (make-div :children
402 (cons (make-div :children
403 (simplify-include source
404 grammar-content
405 include-content))
406 include-content))))))
408 (defun simplify-include/map (fn l)
409 (loop
410 for x in l
411 for value = (let ((result (funcall fn x)))
412 (when (typep x 'div)
413 (loop
414 for x in (div-content x)
415 for value = (funcall fn x)
416 when value
417 collect value into content
418 finally
419 (setf (div-content x) content)))
420 result)
421 when value
422 collect value))
424 (defun simplify-include/start (source grammar-content include-content)
425 (let ((startp
426 (block nil
427 (simplify-include/map (lambda (x)
428 (when (typep x 'start)
429 (return t))
431 include-content))))
432 (if startp
433 (let ((ok nil))
434 (prog1
435 (simplify-include/map (lambda (x)
436 (cond
437 ((typep x 'start) (setf ok t) nil)
438 (t x)))
439 grammar-content))
440 (unless ok
441 (rng-error source "expected start in grammar")))
442 grammar-content)))
444 (defun simplify-include/define (source grammar-content include-content)
445 (let ((defines '()))
446 (simplify-include/map (lambda (x)
447 (when (typep x 'define)
448 (push (cons x nil) defines))
450 include-content)
451 (prog1
452 (simplify-include/map
453 (lambda (x)
454 (if (typep x 'define)
455 (let ((cons (find (define-name x) defines :key #'car)))
456 (cond
457 (cons
458 (setf (cdr cons) t)
459 nil)
461 x)))
463 grammar-content)
464 (loop for (define . okp) in defines do
465 (unless okp
466 (rng-error source "expected matching ~A in grammar" define))))))
468 (defun simplify-include (source grammar-content include-content)
469 (simplify-include/define
470 source
471 (simplify-include/start source grammar-content include-content)
472 include-content))
474 (defun p/name-class (source)
475 (klacks:expecting-element (source)
476 (with-datatype-library (klacks:list-attributes source)
477 (case (find-symbol (klacks:current-lname source) :keyword)
478 (:|name|
479 (list :name (string-trim *whitespace* (parse-characters source))))
480 (:|anyName|
481 (cons :any (p/except-name-class? source)))
482 (:|nsName|
483 (cons :ns (p/except-name-class? source)))
484 (:|choice|
485 (cons :choice (p/name-class* source)))
487 (skip-foreign source))))))
489 (defun p/name-class* (source)
490 (let ((results nil))
491 (loop
492 (case (klacks:peek-next source)
493 (:start-element (push (p/name-class source) results))
494 (:end-element (return))))
495 (nreverse results)))
497 (defun p/except-name-class? (source)
498 (loop
499 (multiple-value-bind (key lname)
500 (klacks:peek-next source)
501 (unless (eq key :start-element)
502 (return))
503 (when (string= (find-symbol lname :keyword) "except")
504 (return (p/except-name-class source)))
505 (skip-foreign source))))
507 (defun p/except-name-class (source)
508 (klacks:expecting-element (source "except")
509 (with-datatype-library (klacks:list-attributes source)
510 (cons :except (p/name-class source)))))
512 (defun escape-uri (string)
513 (with-output-to-string (out)
514 (loop for c across (cxml::rod-to-utf8-string string) do
515 (let ((code (char-code c)))
516 ;; http://www.w3.org/TR/xlink/#link-locators
517 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
518 (format out "%~2,'0X" code)
519 (write-char c out))))))
522 ;;;; simplification
524 ;;; 4.1 Annotations
525 ;;; Foreign attributes and elements are removed implicitly while parsing.
527 ;;; 4.2 Whitespace
528 ;;; All character data is discarded while parsing (which can only be
529 ;;; whitespace after validation).
531 ;;; Whitespace in name, type, and combine attributes is stripped while
532 ;;; parsing. Ditto for <name/>.
534 ;;; 4.3. datatypeLibrary attribute
535 ;;; Escaping is done by p/pattern.
536 ;;; Attribute value defaulting is done using *datatype-library*; only
537 ;;; p/data and p/value record the computed value.
539 ;;; 4.4. type attribute of value element
540 ;;; Done by p/value.
542 ;;; 4.5. href attribute
543 ;;; Escaping is done by p/include and p/external-ref.
545 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
546 ;;; but that requires xstream hacking.
548 ;;; 4.6. externalRef element
549 ;;; Done by p/external-ref.
551 ;;; 4.7. include element
552 ;;; Done by p/include.
555 ;;;; tests
557 (defun run-tests (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
558 (dribble "/home/david/src/lisp/cxml-rng/TEST" :if-exists :rename-and-delete)
559 (let ((pass 0)
560 (total 0))
561 (dolist (d (directory p))
562 (let ((name (car (last (pathname-directory d)))))
563 (when (parse-integer name :junk-allowed t)
564 (incf total)
565 (when (test1 d)
566 (incf pass)))))
567 (format t "Passed ~D/~D tests.~%" pass total))
568 (dribble))
570 (defun run-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
571 (test1 (merge-pathnames (format nil "~3,'0D/" n) p)))
573 (defun parse-test (n &optional (p "/home/david/src/lisp/cxml-rng/spec-split/"))
574 (let* ((d (merge-pathnames (format nil "~3,'0D/" n) p))
575 (i (merge-pathnames "i.rng" d))
576 (c (merge-pathnames "c.rng" d))
577 (rng (if (probe-file c) c i)))
578 (format t "~A: " (car (last (pathname-directory d))))
579 (print rng)
580 (parse-relax-ng rng)))
582 (defun test1 (d)
583 (let* ((i (merge-pathnames "i.rng" d))
584 (c (merge-pathnames "c.rng" d)))
585 (format t "~A: " (car (last (pathname-directory d))))
586 (if (probe-file c)
587 (handler-case
588 (progn
589 (parse-relax-ng c)
590 (format t " PASS~%")
592 (error (c)
593 (format t " FAIL: ~A~%" c)
594 nil))
595 (handler-case
596 (progn
597 (parse-relax-ng i)
598 (format t " FAIL: didn't detect invalid schema~%")
599 nil)
600 (rng-error (c)
601 (format t " PASS: ~S~%" c)
603 (error (c)
604 (format t " FAIL: incorrect condition type: ~A~%" c)
605 nil)))))