4.4 & 4.5
[cxml-rng.git] / parse.lisp
blob2e0dee0e138f06a4dcf90cde702e4e7f4034e108
1 (in-package :cxml-rng)
4 (defvar *datatype-library*)
6 (defun parse-relax-ng (input &key entity-resolver)
7 (klacks:with-open-source (source (cxml:make-source input))
8 (klacks:find-event source :start-element)
9 (let ((*datatype-library* ""))
10 (p/pattern source))))
13 ;;;; pattern structures
15 (defstruct pattern
16 ns)
18 (defstruct (%combination (:include pattern) (:conc-name "PATTERN-"))
19 possibilities)
21 (defstruct (%named-pattern (:include pattern) (:conc-name "PATTERN-"))
22 name)
24 (defstruct (element (:include %named-pattern) (:conc-name "PATTERN-"))
25 children)
27 (defstruct (attribute (:include %named-pattern) (:conc-name "PATTERN-"))
28 child)
30 (defstruct (group (:include %combination) (:conc-name "PATTERN-")))
31 (defstruct (interleave (:include %combination) (:conc-name "PATTERN-")))
32 (defstruct (choice (:include %combination) (:conc-name "PATTERN-")))
33 (defstruct (optional (:include %combination) (:conc-name "PATTERN-")))
34 (defstruct (zero-or-more (:include %combination) (:conc-name "PATTERN-")))
35 (defstruct (one-or-more (:include %combination) (:conc-name "PATTERN-")))
36 (defstruct (list-pattern (:include %combination) (:conc-name "PATTERN-")))
37 (defstruct (mixed (:include %combination) (:conc-name "PATTERN-")))
39 (defstruct (ref (:include %named-pattern) (:conc-name "PATTERN-")))
41 (defstruct (parent-ref (:include %named-pattern) (:conc-name "PATTERN-")))
43 (defstruct (empty (:include pattern) (:conc-name "PATTERN-")))
44 (defstruct (text (:include pattern) (:conc-name "PATTERN-")))
46 (defstruct (%typed-pattern (:include pattern) (:conc-name "PATTERN-"))
47 datatype-library)
49 (defstruct (value (:include %typed-pattern) (:conc-name "PATTERN-"))
50 string)
52 (defstruct (data (:include %typed-pattern) (:conc-name "PATTERN-"))
53 type
54 params
55 except)
57 (defstruct (not-allowed (:include pattern) (:conc-name "PATTERN-")))
59 (defstruct (external-ref (:include pattern) (:conc-name "PATTERN-"))
60 href)
62 (defstruct (grammar (:include pattern) (:conc-name "PATTERN-"))
63 content)
66 ;;;; non-pattern
68 (defstruct param
69 name
70 string)
72 (defstruct start
73 combine
74 child)
76 (defstruct define
77 name
78 combine
79 children)
81 (defstruct div
82 content)
84 (defstruct include
85 href
86 content)
89 ;;;; parser
91 (defvar *rng-namespace* "http://relaxng.org/ns/structure/1.0")
93 (defun skip-foreign (source)
94 (print (klacks:current-lname source))
95 (assert (not (equal (klacks:current-uri source) *rng-namespace*)))
96 (klacks:serialize-element source nil))
98 (defun attribute (lname attrs)
99 (let ((a (sax:find-attribute-ns "" lname attrs)))
100 (if a
101 (sax:attribute-value a)
102 nil)))
104 (defvar *whitespace*
105 (format nil "~C~C~C"
106 (code-char 9)
107 (code-char 32)
108 (code-char 13)
109 (code-char 10)))
111 (defun ntc (lname attrs)
112 ;; used for (n)ame, (t)ype, and (c)ombine, this also strings whitespace
113 (let ((a (sax:find-attribute-ns "" lname attrs)))
114 (if a
115 (string-trim *whitespace* (sax:attribute-value a))
116 nil)))
118 (defmacro with-datatype-library (attrs &body body)
119 `(invoke-with-datatype-library (lambda () ,@body) attrs))
121 (defun invoke-with-datatype-library (fn attrs)
122 (let* ((dl (attribute "datatypeLibrary" attrs))
123 (*datatype-library* (if dl (escape-uri dl) *datatype-library*)))
124 (funcall fn)))
126 (defun p/pattern (source)
127 (let* ((lname (klacks:current-lname source))
128 (attrs (klacks:list-attributes source))
129 (ns (attribute "ns" attrs)))
130 (with-datatype-library attrs
131 (case (find-symbol lname :keyword)
132 (:|element| (p/element source (ntc "name" attrs) ns))
133 (:|attribute| (p/attribute source (ntc "name" attrs) ns))
134 (:|group| (p/combination #'make-group source ns))
135 (:|interleave| (p/combination #'make-interleave source ns))
136 (:|choice| (p/combination #'make-choice source ns))
137 (:|optional| (p/combination #'make-optional source ns))
138 (:|zeroOrMore| (p/combination #'make-zero-or-more source ns))
139 (:|oneOrMore| (p/combination #'make-one-or-more source ns))
140 (:|list| (p/combination #'make-list-pattern source ns))
141 (:|mixed| (p/combination #'make-mixed source ns))
142 (:|ref| (p/ref source ns))
143 (:|parentRef| (p/parent-ref source ns))
144 (:|empty| (p/empty source ns))
145 (:|text| (p/text source ns))
146 (:|value| (p/value source ns))
147 (:|data| (p/data source ns))
148 (:|externalRef| (p/external-ref source ns))
149 (:|grammar| (p/grammar source ns))
150 (t (skip-foreign source))))))
152 (defun p/pattern+ (source)
153 (let ((children nil))
154 (loop
155 (case (klacks:peek-next source)
156 (:start-element
157 (let ((p (p/pattern source))) (when p (push p children))))
158 (:end-element (return))))
159 (unless children
160 (error "empty element"))
161 (nreverse children)))
163 (defun p/pattern? (source)
164 (loop
165 (case (klacks:peek-next source)
166 (:start-element (return (p/pattern source)))
167 (:end-element (return)))))
169 (defun p/element (source name ns)
170 (klacks:expecting-element (source "element")
171 (let ((result (make-element :ns ns)))
172 (if name
173 (setf (pattern-name result) (list :name name))
174 (setf (pattern-name result) (p/name-class source)))
175 (setf (pattern-children result) (p/pattern+ source))
176 result)))
178 (defun p/attribute (source name ns)
179 (klacks:expecting-element (source "attribute")
180 (let ((result (make-attribute :ns ns)))
181 (if name
182 (setf (pattern-name result) (list :name name))
183 (setf (pattern-name result) (p/name-class source)))
184 (setf (pattern-child result) (p/pattern? source))
185 result)))
187 (defun p/combination (constructor source ns)
188 (klacks:expecting-element (source)
189 (let ((possibility (p/pattern+ source)))
190 (funcall constructor :possibility possibility :ns ns))))
192 (defun p/ref (source ns)
193 (klacks:expecting-element (source "ref")
194 (make-ref :name (ntc "name" (klacks:list-attributes source))
195 :ns ns)))
197 (defun p/parent-ref (source ns)
198 (klacks:expecting-element (source "parentRef")
199 (make-parent-ref :name (ntc "name" (klacks:list-attributes source))
200 :ns ns)))
202 (defun p/empty (source ns)
203 (klacks:expecting-element (source "empty")
204 (klacks:consume source)
205 (make-empty :ns ns)))
207 (defun p/text (source ns)
208 (klacks:expecting-element (source "text")
209 (klacks:consume source)
210 (make-text :ns ns)))
212 (defun parse-characters (source)
213 ;; fixme
214 (let ((tmp ""))
215 (loop
216 (multiple-value-bind (key data) (klacks:peek-next source)
217 (case key
218 (:characters
219 (setf tmp (concatenate 'string tmp data)))
220 (:end-element (return)))))
221 tmp))
223 (defun p/value (source ns)
224 (klacks:expecting-element (source "value")
225 (let* ((type (ntc "type" (klacks:list-attributes source)))
226 (string (parse-characters source))
227 (dl *datatype-library*))
228 (unless type
229 (setf type "token")
230 (setf dl ""))
231 (make-value :string string :type type :datatype-library dl :ns ns))))
233 (defun p/data (source ns)
234 (klacks:expecting-element (source "data")
235 (let* ((type (ntc "type" (klacks:list-attributes source)))
236 (result (make-data :type type
237 :datatype-library *datatype-library*
238 :ns ns))
239 (params '()))
240 (loop
241 (multiple-value-bind (key lname)
242 (klacks:peek-next source)
243 (case key
244 (:start-element
245 (case (find-symbol lname :keyword)
246 (:|param| (push (p/param source) params))
247 (:|except|
248 (setf (pattern-except result) (p/except-pattern source))
249 (return))
250 (t (skip-foreign source))))
251 (:end-element
252 (return)))))
253 (setf (pattern-params result) (nreverse params))
254 result)))
256 (defun p/param (source)
257 (klacks:expecting-element (source "param")
258 (let ((name (ntc "name" (klacks:list-attributes source)))
259 (string (parse-characters source)))
260 (make-param :name name :string string))))
262 (defun p/except-pattern (source)
263 (klacks:expecting-element (source "except")
264 (with-datatype-library (klacks:list-attributes source)
265 (p/pattern+ source))))
267 (defun p/not-allowed (source ns)
268 (klacks:expecting-element (source "notAllowed")
269 (make-not-allowed :ns ns)))
271 (defun p/external-ref (source ns)
272 (klacks:expecting-element (source "externalRef")
273 (make-external-ref
274 :href (escape-uri (attribute "href" (klacks:list-attributes source)))
275 :ns ns)))
277 (defun p/grammar (source ns)
278 (klacks:expecting-element (source "grammar")
279 (make-grammar :content (p/grammar-content* source) :ns ns)))
281 (defun p/grammar-content* (source &key disallow-include)
282 (let ((content nil))
283 (loop
284 (multiple-value-bind (key lname) (klacks:peek-next source)
285 (case key
286 (:start-element
287 (with-datatype-library (klacks:list-attributes source)
288 (case (find-symbol lname :keyword)
289 (:|start| (push (p/start source) content))
290 (:|define| (push (p/define source) content))
291 (:|div| (push (p/div source) content))
292 (:|include|
293 (when disallow-include
294 (error "nested include not permitted"))
295 (push (p/include source) content))
296 (t (skip-foreign source)))))
297 (:end-element (return)))))
298 (nreverse content)))
300 (defun p/start (source)
301 (klacks:expecting-element (source "start")
302 (let ((combine (ntc "combine" source))
303 (child (p/pattern source)))
304 (make-start :combine (find-symbol (string-upcase combine) :keyword)
305 :child child))))
307 (defun p/define (source)
308 (klacks:expecting-element (source "define")
309 (let ((name (ntc "name" source))
310 (combine (ntc "combine" source))
311 (children (p/pattern+ source)))
312 (make-define :name name
313 :combine (find-symbol (string-upcase combine) :keyword)
314 :children children))))
316 (defun p/div (source)
317 (klacks:expecting-element (source "div")
318 (make-div :content (p/grammar-content* source))))
320 (defun p/include (source)
321 (klacks:expecting-element (source "div")
322 (let ((href (escape-uri (attribute "href" source)))
323 (content (p/grammar-content* source :disallow-include t)))
324 (make-include :href href :content content))))
326 (defun p/name-class (source)
327 (klacks:expecting-element (source)
328 (with-datatype-library (klacks:list-attributes source)
329 (case (find-symbol (klacks:current-lname source) :keyword)
330 (:|name|
331 (list :name (string-trim *whitespace* (parse-characters source))))
332 (:|anyName|
333 (cons :any (p/except-name-class? source)))
334 (:|nsName|
335 (cons :ns (p/except-name-class? source)))
336 (:|choice|
337 (cons :choice (p/name-class* source)))
339 (skip-foreign source))))))
341 (defun p/name-class* (source)
342 (let ((results nil))
343 (loop
344 (case (klacks:peek-next source)
345 (:start-element (push (p/name-class source) results))
346 (:end-element (return))))
347 (nreverse results)))
349 (defun p/except-name-class? (source)
350 (loop
351 (multiple-value-bind (key lname)
352 (klacks:peek-next source)
353 (unless (eq key :start-element)
354 (return))
355 (when (string= (find-symbol lname :keyword) "except")
356 (return (p/except-name-class source)))
357 (skip-foreign source))))
359 (defun p/except-name-class (source)
360 (klacks:expecting-element (source "except")
361 (with-datatype-library (klacks:list-attributes source)
362 (cons :except (p/name-class source)))))
364 (defun escape-uri (string)
365 (with-output-to-string (out)
366 (loop for c across (cxml::rod-to-utf8-string string) do
367 (let ((code (char-code c)))
368 ;; http://www.w3.org/TR/xlink/#link-locators
369 (if (or (>= code 127) (<= code 32) (find c "<>\"{}|\\^`"))
370 (format out "%~2,'0X" code)
371 (write-char c out))))))
374 ;;;; simplification
376 ;;; 4.1 Annotations
377 ;;; Foreign attributes and elements are removed implicitly while parsing.
379 ;;; 4.2 Whitespace
380 ;;; All character data is discarded while parsing (which can only be
381 ;;; whitespace after validation).
383 ;;; Whitespace in name, type, and combine attributes is stripped while
384 ;;; parsing. Ditto for <name/>.
386 ;;; 4.3. datatypeLibrary attribute
387 ;;; Escaping is done by p/pattern.
388 ;;; Attribute value defaulting is done using *datatype-library*; only
389 ;;; p/data and p/value record the computed value.
391 ;;; 4.4. type attribute of value element
392 ;;; Done by p/value.
394 ;;; 4.5. href attribute
395 ;;; Escaping is done by p/include and p/external-ref.
397 ;;; FIXME: Mime-type handling should be the job of the entity resolver,
398 ;;; but that requires xstream hacking.