non-element\'-cache
[cxml-rng.git] / types.lisp
blobb9e1950396db8c3d1330bb784482f82b6c60a971
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
2 ;;;
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8 ;;;
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11 ;;;
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 (in-package :cxml-types)
31 (defclass data-type () ()
32 (:documentation
33 "@short{The abstract superclass of all types.}
35 Each type belongs to a datatype library, named by a keyword. In each
36 library, the types are named by strings.
38 @see-constructor{find-type}
39 @see-slot{type-name}
40 @see-slot{type-library}
41 @see-slot{type-context-dependent-p}
42 @see{parse}
43 @see{equal-using-type}
44 @see{validp}"))
46 (defgeneric find-type (library name &key &allow-other-keys)
47 (:documentation
48 "@arg[library]{datatype library, a keyword symbol}
49 @arg[name]{the type's name, a string}
50 @arg[args]{type parameters, strings named by keyword arguments}
51 @return{an instance of @class{data-type}, or @code{nil}}
52 @short{Look up the type named @em{name} in datatype library @em{library}.}
54 Return a type instance for this type and the additional parameters,
55 or @code{nil} if the type does not exist.
57 Additional parameters (knows as facets in XSD) can be passed to specify
58 or restrict the type for the purposes of @fun{validp}.
60 @see{data-type}"))
62 (defgeneric type-library (type)
63 (:documentation
64 "@arg[type]{an instance of @class{data-type}}
65 @return{library name, a keyword}
66 @short{Return the name of the library this type belongs to.}
68 @see{type-name}
69 @see{type-context-dependent-p}"))
71 (defgeneric type-name (type)
72 (:documentation
73 "@arg[type]{an instance of @class{data-type}}
74 @return{type name, a string}
75 @short{Return the name this type has within its library.}
77 @see{type-library}
78 @see{type-context-dependent-p}"))
80 (defmethod find-type ((library t) name &key &allow-other-keys)
81 (declare (ignore name))
82 nil)
84 (defgeneric type-context-dependent-p (type)
85 (:documentation
86 "@arg[type]{an instance of @class{data-type}}
87 @return{a boolean}
88 @short{Return true if parsing and validation of values by this type
89 depends on the validation context.}
91 In this case, the optional @code{context} argument to @fun{parse} and
92 @fun{validp} is required, and an error will be signalled if it is missing.
94 @see{validation-context}
95 @see{type-name}
96 @see{type-library}
97 @see{type-context-dependent-p}"))
99 (defmethod type-context-dependent-p ((type data-type))
100 nil)
102 (defgeneric equal-using-type (type u v)
103 (:documentation
104 "@arg[type]{an instance of @class{data-type}}
105 @arg[u]{a parsed value as returned by @fun{parse}}
106 @arg[v]{a parsed value as returned by @fun{parse}}
107 @return{a boolean}
108 @short{Compare the @emph{values} @code{u} and @code{v} using a
109 data-type-dependent equality function.}
111 @see{validp}"))
113 (defgeneric parse (type e &optional context)
114 (:documentation
115 "@arg[type]{an instance of @class{data-type}}
116 @arg[e]{a string}
117 @arg[context]{an instance of @class{validation-context}}
118 @return{an object}
119 @short{Parse string @code{e} and return a representation of its value
120 as defined by the data type.}
122 The @code{context} argument is required if @fun{type-context-dependent-p}
123 is true for @code{type}, and will be ignored otherwise.
125 @see{equal-using-type}
126 @see{validp}"))
128 (defgeneric validp (type e &optional context)
129 (:documentation
130 "@arg[type]{an instance of @class{data-type}}
131 @arg[e]{a string}
132 @arg[context]{an instance of @class{validation-context}}
133 @return{a boolean}
134 @short{Determine whether a string is a valid lexical representation
135 for a type.}
137 The @code{context} argument is required if @fun{type-context-dependent-p}
138 is true for @code{type}, and will be ignored otherwise.
140 @see{parse}
141 @see{equal-using-type}"))
144 ;;; Validation context
146 (defclass validation-context () ()
147 (:documentation
148 "@short{This abstract class defines a protocol allowing data types
149 to query the XML parser about its current state.}
151 Some types are context dependent, as indicated by
152 @fun{type-context-dependent-p}. Those types need access to state
153 computed by the XML parser implicitly, like namespace bindings or
154 the Base URI.
156 User-defined subclasses must implement a method
157 for the @fun{context-find-namespace-binding} function.
159 Two pre-defined validation context implementations are
160 provided, one for use with SAX, the other based on Klacks."))
162 (defgeneric context-find-namespace-binding (context prefix)
163 (:documentation
164 "@arg[context]{an instance of @class{validation-context}}
165 @arg[prefix]{name prefix, a string}
166 @return{the namespace URI as a string, or NIL}
167 @short{This function resolves a namespace prefix to a namespace URI in the
168 current context.}
169 All currently declared namespaces
170 are taken into account, including those declared directly on the
171 current element."))
173 (defclass klacks-validation-context (validation-context)
174 ((source :initarg :source :accessor context-source))
175 (:documentation
176 "A validation-context implementation that queries
177 a klacks source for information about the parser's current state.
178 @see-constructor{make-klacks-validation-context}"))
180 (defun make-klacks-validation-context (source)
181 "@arg[source]{a @a[http://common-lisp.net/project/cxml/klacks.html]{
182 klacks source}}
183 @return{a @class{klacks-validation-context}}
184 Create a validation-context that will query the given klacks source for
185 the current parser context."
186 (make-instance 'klacks-validation-context :source source))
188 (defmethod context-find-namespace-binding
189 ((context klacks-validation-context) prefix)
190 (klacks:find-namespace-binding prefix (context-source context)))
192 (defclass sax-validation-context-mixin (validation-context)
193 ((stack :initform nil :accessor context-stack))
194 (:documentation
195 "@short{A class that implements validation-context as a mixin for
196 user-defined SAX handler classes.}
198 The mixin will record namespace information
199 automatically, and the user's SAX handler can simply be passed as a
200 validation context to data type functions."))
202 (defmethod sax:start-prefix-mapping
203 ((handler sax-validation-context-mixin) prefix uri)
204 (push (cons prefix uri) (context-stack handler)))
206 (defmethod sax:end-prefix-mapping
207 ((handler sax-validation-context-mixin) prefix)
208 (setf (context-stack handler)
209 (remove prefix
210 (context-stack handler)
211 :count 1
212 :key #'car
213 :test #'equal)))
215 (defmethod context-find-namespace-binding
216 ((context sax-validation-context-mixin) prefix)
217 (cdr (assoc prefix (context-stack context) :test #'equal)))
220 ;;; Relax NG built-in type library
222 (defclass rng-type (data-type) ()
223 (:documentation
224 "@short{The class of Relax NG built-in types.}
225 Relax NG defines two built-in data type: string and token.
227 The Relax NG type library is named @code{:||}."))
229 (defclass string-type (rng-type) ()
230 (:documentation
231 "@short{The Relax NG 'string' type.}
232 This data type allows arbitrary strings and interprets them as-is.
234 For this type, @fun{parse} will return any string unchanged, and
235 @fun{equal-using-type} compares strings using @code{equal}."))
237 (defclass token-type (rng-type) ()
238 (:documentation
239 "@short{The Relax NG 'token' type.}
240 This data type allows arbitrary strings and normalizes all whitespaces.
242 For this type, @fun{parse} will return the string with leading and
243 trailing whitespace removed, and remaining sequences of spaces
244 compressed down to one space character each.
246 A method for @fun{equal-using-type} compares strings using @code{equal}."))
248 (defmethod type-library ((type rng-type))
249 :||)
251 (defvar *string-data-type* (make-instance 'string-type))
252 (defvar *token-data-type* (make-instance 'token-type))
254 (defmethod find-type ((library (eql :||)) name &rest args &key)
255 (cond
256 ((eq name :probe) t)
257 (args nil)
258 ((equal name "string") *string-data-type*)
259 ((equal name "token") *token-data-type*)
260 (t nil)))
262 (defmethod equal-using-type ((type rng-type) u v)
263 (equal u v))
265 (defmethod validp ((type rng-type) e &optional context)
266 (declare (ignore e context))
269 (defmethod type-name ((type string-type)) "string")
270 (defmethod type-name ((type token-type)) "token")
272 (defmethod parse ((type string-type) e &optional context)
273 (declare (ignore context))
276 (defmethod parse ((type token-type) e &optional context)
277 (declare (ignore context))
278 (normalize-whitespace e))
280 (eval-when (:compile-toplevel :load-toplevel :execute)
281 (defparameter *whitespace*
282 (format nil "~C~C~C~C"
283 (code-char 9)
284 (code-char 32)
285 (code-char 13)
286 (code-char 10))))
288 (defun normalize-whitespace (str)
289 (cl-ppcre:regex-replace-all #.(format nil "[~A]+" *whitespace*)
290 (string-trim *whitespace* str)
291 " "))
294 ;;; XML Schema Part 2: Datatypes Second Edition
296 (defclass xsd-type (data-type)
297 ((min-length :initarg :min-length :accessor min-length)
298 (max-length :initarg :max-length :accessor max-length)
299 (exact-length :initarg :exact-length :accessor exact-length))
300 (:documentation
301 "@short{The class of XML Schema built-in types.}
303 Subclasses of xsd-type provide the built-in types of
304 @a[http://www.w3.org/TR/xmlschema-2/]{
305 XML Schema Part 2: Datatypes Second Edition}
306 as specified in @a[http://relaxng.org/xsd-20010907.html]{Guidelines for
307 using W3C XML Schema Datatypes with RELAX NG}.
309 The XSD type library
310 is named @code{:|http://www.w3.org/2001/XMLSchema-datatypes|}."))
312 (defmethod initialize-instance ((instance xsd-type)
313 &rest args
314 &key ((:|minLength| min-length))
315 ((:|maxLength| max-length))
316 ((:|length| exact-length)))
317 (apply #'call-next-method
318 instance
319 :min-length (when min-length
320 ;; fixme: richtigen fehler
321 (parse-integer min-length))
322 :max-length (when max-length
323 ;; fixme: richtigen fehler
324 (parse-integer max-length))
325 :exact-length (when exact-length
326 ;; fixme: richtigen fehler
327 (parse-integer exact-length))
328 args))
330 (defmethod type-library ((type xsd-type))
331 :|http://www.w3.org/2001/XMLSchema-datatypes|)
333 (defmethod find-type
334 ((library (eql :|http://www.w3.org/2001/XMLSchema-datatypes|))
335 name
336 &rest args &key)
337 args ;fixme
338 (if (eq name :probe)
340 (let ((class
341 (case (find-symbol name :keyword)
342 (:|QName| 'qname-type)
343 (:|NCName| 'ncname-type)
344 (:|anyURI| 'any-uri-type)
345 (:|string| 'xsd-string-type)
346 (t nil))))
347 (if class
348 (apply #'make-instance class args)
349 nil))))
351 (defgeneric %parse (type e context))
353 (defmethod validp ((type xsd-type) e &optional context)
354 (not (eq :error (%parse type e context))))
356 (defmethod parse ((type xsd-type) e &optional context)
357 (let ((result (%parse type e context)))
358 (when (eq result :error)
359 (error "not valid for data type ~A: ~S" type e))
360 result))
363 ;;; QName
365 (defclass qname-type (xsd-type) ())
367 (defmethod type-name ((type qname-type))
368 "QName")
370 (defstruct (qname (:constructor make-qname (uri lname)))
372 lname)
374 (defmethod equal-using-type ((type qname-type) u v)
375 (and (equal (qname-uri u) (qname-uri v))
376 (equal (qname-lname u) (qname-lname v))))
378 (defun namep (str)
379 (and (not (zerop (length str)))
380 (cxml::name-start-rune-p (elt str 0))
381 (every #'cxml::name-rune-p str)))
383 (defmethod %parse ((type qname-type) e context)
384 (setf e (normalize-whitespace e))
385 (handler-case
386 (if (namep e)
387 (multiple-value-bind (prefix local-name) (cxml::split-qname e)
388 (let ((uri (when prefix
389 (context-find-namespace-binding context prefix))))
390 (if (and prefix (not uri))
391 :error
392 (make-qname uri local-name))))
393 :error)
394 (cxml:well-formedness-violation ()
395 :error)))
398 ;;; NCName
400 (defclass ncname-type (xsd-type) ())
402 (defmethod type-name ((type ncname-type))
403 "NCName")
405 (defmethod equal-using-type ((type ncname-type) u v)
406 (equal u v))
408 (defun nc-name-p (str)
409 (and (namep str) (cxml::nc-name-p str)))
411 (defmethod %parse ((type ncname-type) e context)
412 (setf e (normalize-whitespace e))
413 (if (nc-name-p e)
415 :error))
418 ;;; AnyURi
420 (defclass any-uri-type (xsd-type) ())
422 (defmethod type-name ((type any-uri-type))
423 "AnyURI")
425 (defmethod equal-using-type ((type any-uri-type) u v)
426 (equal u v))
428 (defmethod %parse ((type any-uri-type) e context)
429 (cxml-rng::escape-uri (normalize-whitespace e)))
432 ;;; string
434 (defclass xsd-string-type (xsd-type) ())
436 (defmethod type-name ((type xsd-string-type))
437 "string")
439 (defmethod equal-using-type ((type xsd-string-type) u v)
440 (equal u v))
442 (defmethod %parse ((type xsd-string-type) e context)
443 (if (or (and (min-length type) (< (length e) (min-length type)))
444 (and (max-length type) (> (length e) (max-length type)))
445 (and (exact-length type) (/= (length e) (exact-length type))))
446 :error