test list
[cxml-rng.git] / parse.lisp
blobf4cc2a3e0937cf00041d94d7475230deaee3dbc8
1 (in-package :cxml-rng)
4 ;;;; Errors
6 (define-condition rng-error (simple-error) ())
8 (defun rng-error (fmt &rest args)
9 (error 'rng-error :format-control fmt :format-arguments args))
12 ;;;; Parser
14 (defvar *datatype-library*)
15 (defvar *entity-resolver*)
16 (defvar *external-href-stack*)
17 (defvar *include-href-stack*)
19 (defun parse-relax-ng (input &key entity-resolver)
20 (handler-case
21 (klacks:with-open-source (source (cxml:make-source input))
22 (klacks:find-event source :start-element)
23 (let ((*datatype-library* "")
24 (*entity-resolver* entity-resolver)
25 (*external-href-stack* '())
26 (*include-href-stack* '()))
27 (p/pattern source)))
28 (cxml:xml-parse-error (c)
29 (rng-error "Cannot parse schema: ~A" c))))
32 ;;;; pattern structures
34 (defstruct pattern
35 ns)
37 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
38 possibilities)
40 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
41 name)
43 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
44 children)
46 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
47 child)
49 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
50 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
51 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
52 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
53 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
54 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
55 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
56 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
58 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
60 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
62 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
63 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
65 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
66 datatype-library)
68 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
69 string)
71 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
72 type
73 params
74 except)
76 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
78 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
79 content)
82 ;;;; non-pattern
84 (defstruct param
85 name
86 string)
88 (defstruct start
89 combine
90 child)
92 (defstruct define
93 name
94 combine
95 children)
97 (defstruct div
98 content)
100 (defstruct include
101 href
102 content)
105 ;;;; parser
107 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
109 (defun skip-foreign (source)
110 (when (equal (klacks:current-uri source) *rng-namespace*)
111 (rng-error "invalid schema: ~A not allowed here"
112 (klacks:current-lname source)))
113 (klacks:serialize-element source nil))
115 (defun attribute (lname attrs)
116 (let ((a (sax:find-attribute-ns "" lname attrs)))
117 (if a
118 (sax:attribute-value a)
119 nil)))
121 (defvar *whitespace*
122 (format nil "~C~C~C"
123 (code-char 9)
124 (code-char 32)
125 (code-char 13)
126 (code-char 10)))
128 (defun ntc (lname attrs)
129 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
130 (let ((a (sax:find-attribute-ns "" lname attrs)))
131 (if a
132 (string-trim *whitespace* (sax:attribute-value a))
133 nil)))
135 (defmacro with-datatype-library (attrs &body body)
136 `(invoke-with-datatype-library (lambda () ,@body) ,attrs))
138 (defun invoke-with-datatype-library (fn attrs)
139 (let* ((dl (attribute "datatypeLibrary" attrs))
140 (*datatype-library* (if dl (escape-uri dl) *datatype-library*)))
141 (funcall fn)))
143 (defun p/pattern (source)
144 (let* ((lname (klacks:current-lname source))
145 (attrs (klacks:list-attributes source))
146 (ns (attribute "ns" attrs)))
147 (with-datatype-library attrs
148 (case (find-symbol lname :keyword)
149 (:|element| (p/element source (ntc "name" attrs) ns))
150 (:|attribute| (p/attribute source (ntc "name" attrs) ns))
151 (:|group| (p/combination #'make-group source ns))
152 (:|interleave| (p/combination #'make-interleave source ns))
153 (:|choice| (p/combination #'make-choice source ns))
154 (:|optional| (p/combination #'make-optional source ns))
155 (:|zeroOrMore| (p/combination #'make-zero-or-more source ns))
156 (:|oneOrMore| (p/combination #'make-one-or-more source ns))
157 (:|list| (p/combination #'make-list-pattern source ns))
158 (:|mixed| (p/combination #'make-mixed source ns))
159 (:|ref| (p/ref source ns))
160 (:|parentRef| (p/parent-ref source ns))
161 (:|empty| (p/empty source ns))
162 (:|text| (p/text source ns))
163 (:|value| (p/value source ns))
164 (:|data| (p/data source ns))
165 (:|externalRef| (p/external-ref source ns))
166 (:|grammar| (p/grammar source ns))
167 (t (skip-foreign source))))))
169 (defun p/pattern+ (source)
170 (let ((children nil))
171 (loop
172 (case (klacks:peek-next source)
173 (:start-element
174 (let ((p (p/pattern source))) (when p (push p children))))
175 (:end-element (return))))
176 (unless children
177 (error "empty element"))
178 (nreverse children)))
180 (defun p/pattern? (source)
181 (loop
182 (case (klacks:peek-next source)
183 (:start-element (return (p/pattern source)))
184 (:end-element (return)))))
186 (defun p/element (source name ns)
187 (klacks:expecting-element (source "element")
188 (let ((result (make-element :ns ns)))
189 (if name
190 (setf (pattern-name result) (list :name name))
191 (setf (pattern-name result) (p/name-class source)))
192 (setf (pattern-children result) (p/pattern+ source))
193 result)))
195 (defun p/attribute (source name ns)
196 (klacks:expecting-element (source "attribute")
197 (let ((result (make-attribute :ns ns)))
198 (if name
199 (setf (pattern-name result) (list :name name))
200 (setf (pattern-name result) (p/name-class source)))
201 (setf (pattern-child result) (p/pattern? source))
202 result)))
204 (defun p/combination (constructor source ns)
205 (klacks:expecting-element (source)
206 (let ((possibility (p/pattern+ source)))
207 (funcall constructor :possibility possibility :ns ns))))
209 (defun p/ref (source ns)
210 (klacks:expecting-element (source "ref")
211 (make-ref :name (ntc "name" (klacks:list-attributes source))
212 :ns ns)))
214 (defun p/parent-ref (source ns)
215 (klacks:expecting-element (source "parentRef")
216 (make-parent-ref :name (ntc "name" (klacks:list-attributes source))
217 :ns ns)))
219 (defun p/empty (source ns)
220 (klacks:expecting-element (source "empty")
221 (klacks:consume source)
222 (make-empty :ns ns)))
224 (defun p/text (source ns)
225 (klacks:expecting-element (source "text")
226 (klacks:consume source)
227 (make-text :ns ns)))
229 (defun parse-characters (source)
230 ;; fixme
231 (let ((tmp ""))
232 (loop
233 (multiple-value-bind (key data) (klacks:peek-next source)
234 (case key
235 (:characters
236 (setf tmp (concatenate 'string tmp data)))
237 (:end-element (return)))))
238 tmp))
240 (defun p/value (source ns)
241 (klacks:expecting-element (source "value")
242 (let* ((type (ntc "type" (klacks:list-attributes source)))
243 (string (parse-characters source))
244 (dl *datatype-library*))
245 (unless type
246 (setf type "token")
247 (setf dl ""))
248 (make-value :string string :type type :datatype-library dl :ns ns))))
250 (defun p/data (source ns)
251 (klacks:expecting-element (source "data")
252 (let* ((type (ntc "type" (klacks:list-attributes source)))
253 (result (make-data :type type
254 :datatype-library *datatype-library*
255 :ns ns))
256 (params '()))
257 (loop
258 (multiple-value-bind (key lname)
259 (klacks:peek-next source)
260 (case key
261 (:start-element
262 (case (find-symbol lname :keyword)
263 (:|param| (push (p/param source) params))
264 (:|except|
265 (setf (pattern-except result) (p/except-pattern source))
266 (return))
267 (t (skip-foreign source))))
268 (:end-element
269 (return)))))
270 (setf (pattern-params result) (nreverse params))
271 result)))
273 (defun p/param (source)
274 (klacks:expecting-element (source "param")
275 (let ((name (ntc "name" (klacks:list-attributes source)))
276 (string (parse-characters source)))
277 (make-param :name name :string string))))
279 (defun p/except-pattern (source)
280 (klacks:expecting-element (source "except")
281 (with-datatype-library (klacks:list-attributes source)
282 (p/pattern+ source))))
284 (defun p/not-allowed (source ns)
285 (klacks:expecting-element (source "notAllowed")
286 (make-not-allowed :ns ns)))
288 (defun p/external-ref (source ns)
289 (klacks:expecting-element (source "externalRef")
290 (let ((href
291 (escape-uri (attribute "href" (klacks:list-attributes source)))))
292 (when (find href *include-href-stack* :test #'string=)
293 (error "looping include"))
294 (let* ((*include-href-stack* (cons href *include-href-stack*))
295 (xstream (cxml::xstream-open-extid* *entity-resolver* nil href))
296 (result
297 (klacks:with-open-source (source (cxml:make-source xstream))
298 (klacks:find-event source :start-element)
299 (let ((*datatype-library* ""))
300 (p/pattern source)))))
301 (unless (pattern-ns result)
302 (setf (pattern-ns result) ns))
303 result))))
305 (defun p/grammar (source ns)
306 (klacks:expecting-element (source "grammar")
307 (make-grammar :content (p/grammar-content* source) :ns ns)))
309 (defun p/grammar-content* (source &key disallow-include)
310 (let ((content nil))
311 (loop
312 (multiple-value-bind (key lname) (klacks:peek-next source)
313 (case key
314 (:start-element
315 (with-datatype-library (klacks:list-attributes source)
316 (case (find-symbol lname :keyword)
317 (:|start| (push (p/start source) content))
318 (:|define| (push (p/define source) content))
319 (:|div| (push (p/div source) content))
320 (:|include|
321 (when disallow-include
322 (error "nested include not permitted"))
323 (push (p/include source) content))
324 (t (skip-foreign source)))))
325 (:end-element (return)))))
326 (nreverse content)))
328 (defun p/start (source)
329 (klacks:expecting-element (source "start")
330 (let ((combine (ntc "combine" source))
331 (child (p/pattern source)))
332 (make-start :combine (find-symbol (string-upcase combine) :keyword)
333 :child child))))
335 (defun p/define (source)
336 (klacks:expecting-element (source "define")
337 (let ((name (ntc "name" source))
338 (combine (ntc "combine" source))
339 (children (p/pattern+ source)))
340 (make-define :name name
341 :combine (find-symbol (string-upcase combine) :keyword)
342 :children children))))
344 (defun p/div (source)
345 (klacks:expecting-element (source "div")
346 (make-div :content (p/grammar-content* source))))
348 (defun p/include (source)
349 (klacks:expecting-element (source "include")
350 (let ((href
351 (escape-uri (attribute "href" (klacks:list-attributes source))))
352 (include-content (p/grammar-content* source :disallow-include t)))
353 (when (find href *include-href-stack* :test #'string=)
354 (error "looping include"))
355 (let* ((*include-href-stack* (cons href *include-href-stack*))
356 (xstream (cxml::xstream-open-extid* *entity-resolver* nil href))
357 (grammar
358 (klacks:with-open-source (source (cxml:make-source xstream))
359 (klacks:find-event source :start-element)
360 (let ((*datatype-library* ""))
361 (p/grammar source "wrong://"))))
362 (grammar-content (pattern-content grammar)))
363 (make-div :children
364 (cons (make-div :children
365 (simplify-include grammar-content
366 include-content))
367 include-content))))))
369 (defun simplify-include/map (fn l)
370 (loop
371 for x in l
372 for value = (let ((result (funcall fn x)))
373 (when (typep x 'div)
374 (loop
375 for x in (div-content x)
376 for value = (funcall fn x)
377 when value
378 collect value into content
379 finally
380 (setf (div-content x) content)))
381 result)
382 when value
383 collect value))
385 (defun simplify-include/start (grammar-content include-content)
386 (let ((startp
387 (block nil
388 (simplify-include/map (lambda (x)
389 (when (typep x 'start)
390 (return t))
392 include-content))))
393 (if startp
394 (let ((ok nil))
395 (prog1
396 (simplify-include/map (lambda (x)
397 (cond
398 ((typep x 'start) (setf ok t) nil)
399 (t x)))
400 grammar-content))
401 (unless ok
402 (error "expected start in grammar")))
403 grammar-content)))
405 (defun simplify-include/define (grammar-content include-content)
406 (let ((defines '()))
407 (simplify-include/map (lambda (x)
408 (when (typep x 'define)
409 (push (cons x nil) defines))
411 include-content)
412 (prog1
413 (simplify-include/map
414 (lambda (x)
415 (if (typep x 'define)
416 (let ((cons (find (define-name x) defines :key #'car)))
417 (cond
418 (cons
419 (setf (cdr cons) t)
420 nil)
422 x)))
424 grammar-content)
425 (loop for (define . okp) in defines do
426 (unless okp
427 (error "expected matching ~A in grammar" define))))))
429 (defun simplify-include (grammar-content include-content)
430 (simplify-include/define
431 (simplify-include/start grammar-content include-content)
432 include-content))
434 (defun p/name-class (source)
435 (klacks:expecting-element (source)
436 (with-datatype-library (klacks:list-attributes source)
437 (case (find-symbol (klacks:current-lname source) :keyword)
438 (:|name|
439 (list :name (string-trim *whitespace* (parse-characters source))))
440 (:|anyName|
441 (cons :any (p/except-name-class? source)))
442 (:|nsName|
443 (cons :ns (p/except-name-class? source)))
444 (:|choice|
445 (cons :choice (p/name-class* source)))
447 (skip-foreign source))))))
449 (defun p/name-class* (source)
450 (let ((results nil))
451 (loop
452 (case (klacks:peek-next source)
453 (:start-element (push (p/name-class source) results))
454 (:end-element (return))))
455 (nreverse results)))
457 (defun p/except-name-class? (source)
458 (loop
459 (multiple-value-bind (key lname)
460 (klacks:peek-next source)
461 (unless (eq key :start-element)
462 (return))
463 (when (string= (find-symbol lname :keyword) "except")
464 (return (p/except-name-class source)))
465 (skip-foreign source))))
467 (defun p/except-name-class (source)
468 (klacks:expecting-element (source "except")
469 (with-datatype-library (klacks:list-attributes source)
470 (cons :except (p/name-class source)))))
472 (defun escape-uri (string)
473 (with-output-to-string (out)
474 (loop for c across (cxml::rod-to-utf8-string string) do
475 (let ((code (char-code c)))
476 ;; http://www.w3.org/TR/xlink/#link-locators
477 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
478 (format out "%~2,'0X" code)
479 (write-char c out))))))
482 ;;;; simplification
484 ;;; 4.1 Annotations
485 ;;; Foreign attributes and elements are removed implicitly while parsing.
487 ;;; 4.2 Whitespace
488 ;;; All character data is discarded while parsing (which can only be
489 ;;; whitespace after validation).
491 ;;; Whitespace in name, type, and combine attributes is stripped while
492 ;;; parsing. Ditto for <name/>.
494 ;;; 4.3. datatypeLibrary attribute
495 ;;; Escaping is done by p/pattern.
496 ;;; Attribute value defaulting is done using *datatype-library*; only
497 ;;; p/data and p/value record the computed value.
499 ;;; 4.4. type attribute of value element
500 ;;; Done by p/value.
502 ;;; 4.5. href attribute
503 ;;; Escaping is done by p/include and p/external-ref.
505 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
506 ;;; but that requires xstream hacking.
508 ;;; 4.6. externalRef element
509 ;;; Done by p/external-ref.
511 ;;; 4.7. include element
512 ;;; Done by p/include.
515 ;;;; tests
517 (defun test (&optional (p "/home/david/src/lisp/cxml-rng/spec-split/*"))
518 (dribble "/home/david/src/lisp/cxml-rng/TEST")
519 (let ((pass 0)
520 (total 0))
521 (dolist (d (directory p))
522 (let ((name (car (last (pathname-directory d)))))
523 (when (parse-integer name :junk-allowed t)
524 (incf total)
525 (let* ((i (merge-pathnames "i.rng" d))
526 (c (merge-pathnames "c.rng" d)))
527 (format t "~A: " name)
528 (if (probe-file c)
529 (handler-case
530 (progn
531 (parse-relax-ng c)
532 (format t " PASS~%")
533 (incf pass))
534 (error (c)
535 (format t " FAIL: ~A~%" c)))
536 (handler-case
537 (progn
538 (parse-relax-ng i)
539 (format t " FAIL: didn't detect invalid schema~%"))
540 (rng-error (c)
541 (format t " PASS: ~A~%" c)
542 (incf pass))
543 (error (c)
544 (format t " FAIL: incorrect condition type: ~A~%" c))))))))
545 (format t "Passed ~D/~D tests.~%" pass total))
546 (dribble))