1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 (in-package :cxml-types
)
31 (defclass data-type
() ()
33 "@short{The abstract superclass of all types.}
35 Each type belongs to a datatype library, named by a keyword. In each
36 library, the types are named by strings.
38 @see-constructor{find-type}
40 @see-slot{type-library}
41 @see-slot{type-context-dependent-p}
43 @see{equal-using-type}
46 (defgeneric find-type
(library name
&key
&allow-other-keys
)
48 "@arg[library]{datatype library, a keyword symbol}
49 @arg[name]{the type's name, a string}
50 @arg[args]{type parameters, strings named by keyword arguments}
51 @return{an instance of @class{data-type}, or @code{nil}}
52 @short{Look up the type named @em{name} in datatype library @em{library}.}
54 Return a type instance for this type and the additional parameters,
55 or @code{nil} if the type does not exist.
57 Additional parameters (knows as facets in XSD) can be passed to specify
58 or restrict the type for the purposes of @fun{validp}.
62 (defgeneric type-library
(type)
64 "@arg[type]{an instance of @class{data-type}}
65 @return{library name, a keyword}
66 @short{Return the name of the library this type belongs to.}
69 @see{type-context-dependent-p}"))
71 (defgeneric type-name
(type)
73 "@arg[type]{an instance of @class{data-type}}
74 @return{type name, a string}
75 @short{Return the name this type has within its library.}
78 @see{type-context-dependent-p}"))
80 (defmethod find-type ((library t
) name
&key
&allow-other-keys
)
81 (declare (ignore name
))
84 (defgeneric type-context-dependent-p
(type)
86 "@arg[type]{an instance of @class{data-type}}
88 @short{Return true if parsing and validation of values by this type
89 depends on the validation context.}
91 In this case, the optional @code{context} argument to @fun{parse} and
92 @fun{validp} is required, and an error will be signalled if it is missing.
94 @see{validation-context}
97 @see{type-context-dependent-p}"))
99 (defmethod type-context-dependent-p ((type data-type
))
102 (defgeneric equal-using-type
(type u v
)
104 "@arg[type]{an instance of @class{data-type}}
105 @arg[u]{a parsed value as returned by @fun{parse}}
106 @arg[v]{a parsed value as returned by @fun{parse}}
108 @short{Compare the @emph{values} @code{u} and @code{v} using a
109 data-type-dependent equality function.}
113 (defgeneric parse
(type e
&optional context
)
115 "@arg[type]{an instance of @class{data-type}}
117 @arg[context]{an instance of @class{validation-context}}
119 @short{Parse string @code{e} and return a representation of its value
120 as defined by the data type.}
122 The @code{context} argument is required if @fun{type-context-dependent-p}
123 is true for @code{type}, and will be ignored otherwise.
125 @see{equal-using-type}
128 (defgeneric validp
(type e
&optional context
)
130 "@arg[type]{an instance of @class{data-type}}
132 @arg[context]{an instance of @class{validation-context}}
134 @short{Determine whether a string is a valid lexical representation
137 The @code{context} argument is required if @fun{type-context-dependent-p}
138 is true for @code{type}, and will be ignored otherwise.
141 @see{equal-using-type}"))
144 ;;; Validation context
146 (defclass validation-context
() ()
148 "@short{This abstract class defines a protocol allowing data types
149 to query the XML parser about its current state.}
151 Some types are context dependent, as indicated by
152 @fun{type-context-dependent-p}. Those types need access to state
153 computed by the XML parser implicitly, like namespace bindings or
156 User-defined subclasses must implement a method
157 for the @fun{context-find-namespace-binding} function.
159 Two pre-defined validation context implementations are
160 provided, one for use with SAX, the other based on Klacks."))
162 (defgeneric context-find-namespace-binding
(context prefix
)
164 "@arg[context]{an instance of @class{validation-context}}
165 @arg[prefix]{name prefix, a string}
166 @return{the namespace URI as a string, or NIL}
167 @short{This function resolves a namespace prefix to a namespace URI in the
169 All currently declared namespaces
170 are taken into account, including those declared directly on the
173 (defclass klacks-validation-context
(validation-context)
174 ((source :initarg
:source
:accessor context-source
))
176 "A validation-context implementation that queries
177 a klacks source for information about the parser's current state.
178 @see-constructor{make-klacks-validation-context}"))
180 (defun make-klacks-validation-context (source)
181 "@arg[source]{a @a[http://common-lisp.net/project/cxml/klacks.html]{
183 @return{a @class{klacks-validation-context}}
184 Create a validation-context that will query the given klacks source for
185 the current parser context."
186 (make-instance 'klacks-validation-context
:source source
))
188 (defmethod context-find-namespace-binding
189 ((context klacks-validation-context
) prefix
)
190 (klacks:find-namespace-binding prefix
(context-source context
)))
192 (defclass sax-validation-context-mixin
(validation-context)
193 ((stack :initform nil
:accessor context-stack
))
195 "@short{A class that implements validation-context as a mixin for
196 user-defined SAX handler classes.}
198 The mixin will record namespace information
199 automatically, and the user's SAX handler can simply be passed as a
200 validation context to data type functions."))
202 (defmethod sax:start-prefix-mapping
203 ((handler sax-validation-context-mixin
) prefix uri
)
204 (push (cons prefix uri
) (context-stack handler
)))
206 (defmethod sax:end-prefix-mapping
207 ((handler sax-validation-context-mixin
) prefix
)
208 (setf (context-stack handler
)
210 (context-stack handler
)
215 (defmethod context-find-namespace-binding
216 ((context sax-validation-context-mixin
) prefix
)
217 (cdr (assoc prefix
(context-stack context
) :test
#'equal
)))
220 ;;; Relax NG built-in type library
222 (defclass rng-type
(data-type) ()
224 "@short{The class of Relax NG built-in types.}
225 Relax NG defines two built-in data type: string and token.
227 The Relax NG type library is named @code{:||}."))
229 (defclass string-type
(rng-type) ()
231 "@short{The Relax NG 'string' type.}
232 This data type allows arbitrary strings and interprets them as-is.
234 For this type, @fun{parse} will return any string unchanged, and
235 @fun{equal-using-type} compares strings using @code{equal}."))
237 (defclass token-type
(rng-type) ()
239 "@short{The Relax NG 'token' type.}
240 This data type allows arbitrary strings and normalizes all whitespaces.
242 For this type, @fun{parse} will return the string with leading and
243 trailing whitespace removed, and remaining sequences of spaces
244 compressed down to one space character each.
246 A method for @fun{equal-using-type} compares strings using @code{equal}."))
248 (defmethod type-library ((type rng-type
))
251 (defvar *string-data-type
* (make-instance 'string-type
))
252 (defvar *token-data-type
* (make-instance 'token-type
))
254 (defmethod find-type ((library (eql :||
)) name
&rest args
&key
)
258 ((equal name
"string") *string-data-type
*)
259 ((equal name
"token") *token-data-type
*)
262 (defmethod equal-using-type ((type rng-type
) u v
)
265 (defmethod validp ((type rng-type
) e
&optional context
)
266 (declare (ignore e context
))
269 (defmethod type-name ((type string-type
)) "string")
270 (defmethod type-name ((type token-type
)) "token")
272 (defmethod parse ((type string-type
) e
&optional context
)
273 (declare (ignore context
))
276 (defmethod parse ((type token-type
) e
&optional context
)
277 (declare (ignore context
))
278 (normalize-whitespace e
))
280 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
281 (defparameter *whitespace
*
282 (format nil
"~C~C~C~C"
288 (defun normalize-whitespace (str)
289 (cl-ppcre:regex-replace-all
#.
(format nil
"[~A]+" *whitespace
*)
290 (string-trim *whitespace
* str
)
294 ;;; XML Schema Part 2: Datatypes Second Edition
296 (defparameter *xsd-types
* (make-hash-table :test
'equal
))
299 ((class-name type-name
) (&rest supers
) (&rest slots
) &rest args
)
301 (setf (gethash ,type-name
*xsd-types
*) ',class-name
)
302 (defclass ,class-name
,supers
303 ((type-name :initform
,type-name
309 (defclass xsd-type
(data-type)
310 ((min-length :initarg
:min-length
:accessor min-length
)
311 (max-length :initarg
:max-length
:accessor max-length
)
312 (exact-length :initarg
:exact-length
:accessor exact-length
))
314 "@short{The class of XML Schema built-in types.}
316 Subclasses of xsd-type provide the built-in types of
317 @a[http://www.w3.org/TR/xmlschema-2/]{
318 XML Schema Part 2: Datatypes Second Edition}
319 as specified in @a[http://relaxng.org/xsd-20010907.html]{Guidelines for
320 using W3C XML Schema Datatypes with RELAX NG}.
323 is named @code{:|http://www.w3.org/2001/XMLSchema-datatypes|}."))
325 (defmethod initialize-instance ((instance xsd-type
)
327 &key
((:|minLength| min-length
))
328 ((:|maxLength| max-length
))
329 ((:|length| exact-length
)))
330 (apply #'call-next-method
332 :min-length
(when min-length
333 ;; fixme: richtigen fehler
334 (parse-integer min-length
))
335 :max-length
(when max-length
336 ;; fixme: richtigen fehler
337 (parse-integer max-length
))
338 :exact-length
(when exact-length
339 ;; fixme: richtigen fehler
340 (parse-integer exact-length
))
343 (defmethod type-library ((type xsd-type
))
344 :|http
://www.w3.org
/2001/XMLSchema-datatypes|
)
347 ((library (eql :|http
://www.w3.org
/2001/XMLSchema-datatypes|
))
353 (let ((class (gethash name
*xsd-types
*)))
355 (apply #'make-instance class args
)
358 (defgeneric %parse
(type e context
))
360 (defmethod validp ((type xsd-type
) e
&optional context
)
361 (not (eq :error
(%parse type e context
))))
363 (defmethod parse ((type xsd-type
) e
&optional context
)
364 (let ((result (%parse type e context
)))
365 (when (eq result
:error
)
366 (error "not valid for data type ~A: ~S" type e
))
372 (defxsd (duration-type "duration") (xsd-type) ())
374 (defmethod equal-using-type ((type duration-type
) u v
)
377 (defmethod %parse
((type duration-type
) e context
)
378 (declare (ignore context
))
380 (cl-ppcre:scan-to-strings
"(?x)
382 P(?:(\\d+)Y)? # years
383 (?:(\\d+)M)? # months
387 (?:(\\d+)M)? # minutes
388 (?:(\\d+(?:[.]\\d+)?)S)? # seconds
391 (destructuring-bind (&optional minusp y m d tp h min s
)
393 (if (and (or y m d h min s
)
394 (or (null tp
) (or h min s
)))
395 (let ((f (if minusp -
1 1)))
397 (and str
(* f
(parse-integer str
)))))
398 (list (int y
) (int m
) (int d
) (int h
) (int min
)
399 (and s
(* f
(parse-number:parse-number s
))))))
405 (defxsd (date-time-type "dateTime") (xsd-type) ())
407 (defmethod equal-using-type ((type duration-type
) u v
)
410 ;; FIXME: Was ist denn nun mit der Zeitzone? Sollen wir die wegwerfen oder
411 ;; hat das was mit timeOnTimeline zu tun? Verstehe ich nicht.
412 (defmethod %parse
((type date-time-type
) e context
)
413 (declare (ignore context
))
415 (cl-ppcre:scan-to-strings
"(?x)
417 ((?:[1-9]\d*)?\d{4}) # year
423 -(\d+(?:[.]\\d+)?) # second
424 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
427 (destructuring-bind (&optional minusp y m d h min s tz tz-sign tz-h tz-m
)
429 ;; parse into numbers
431 (and str
(parse-integer str
)))
433 (and str
(parse-number:parse-number str
))))
434 (setf (values y m d h min s tz-h tz-m
)
435 (values (* (int y
) (if minusp -
1 1))
436 (int m
) (int d
) (int h
) (int min
)
438 (int tz-h
) (int tz-m
))))
441 ((and (eql m
2) (zerop (mod y
4)) (not (zerop (mod y
400)))) 29)
452 ;; zzz sind leap seconds immer erlaubt?
454 (list (* y
(if minusp -
1 1)) m d h min s
)
460 (defxsd (time-type "time") (xsd-type) ())
466 (defxsd (date-type "date") (xsd-type) ())
472 (defxsd (year-month-type "gYearMonth") (xsd-type) ())
478 (defxsd (year-type "gYear") (xsd-type) ())
484 (defxsd (month-day-type "gMonthDay") (xsd-type) ())
490 (defxsd (day-type "gDay") (xsd-type) ())
496 (defxsd (month-type "gMonth") (xsd-type) ())
502 (defxsd (boolean-type "boolean") (xsd-type) ())
507 (defxsd (base64-binary-type "base64Binary") (xsd-type) ())
512 (defxsd (hex-binary-type "hexBinary") (xsd-type) ())
517 (defxsd (float-type "float") (xsd-type) ())
522 (defxsd (decimal-type "decimal") (xsd-type) ())
527 (defxsd (double-type "double") (xsd-type) ())
532 (defxsd (any-uri-type "AnyURI") (xsd-type) ())
534 (defmethod equal-using-type ((type any-uri-type
) u v
)
537 (defmethod %parse
((type any-uri-type
) e context
)
538 (cxml-rng::escape-uri
(normalize-whitespace e
)))
543 (defxsd (qname-type "QName") (xsd-type) ())
545 (defstruct (qname (:constructor make-qname
(uri lname
)))
549 (defmethod equal-using-type ((type qname-type
) u v
)
550 (and (equal (qname-uri u
) (qname-uri v
))
551 (equal (qname-lname u
) (qname-lname v
))))
554 (and (not (zerop (length str
)))
555 (cxml::name-start-rune-p
(elt str
0))
556 (every #'cxml
::name-rune-p str
)))
558 (defmethod %parse
((type qname-type
) e context
)
559 (setf e
(normalize-whitespace e
))
562 (multiple-value-bind (prefix local-name
) (cxml::split-qname e
)
563 (let ((uri (when prefix
564 (context-find-namespace-binding context prefix
))))
565 (if (and prefix
(not uri
))
567 (make-qname uri local-name
))))
569 (cxml:well-formedness-violation
()
575 (defxsd (notation-type "NOTATION") (xsd-type) ())
582 (defxsd (xsd-string-type "string") (xsd-type) ())
584 (defmethod equal-using-type ((type xsd-string-type
) u v
)
587 (defmethod %parse
((type xsd-string-type
) e context
)
588 (if (or (and (min-length type
) (< (length e
) (min-length type
)))
589 (and (max-length type
) (> (length e
) (max-length type
)))
590 (and (exact-length type
) (/= (length e
) (exact-length type
))))
601 (defxsd (normalized-string-type "normalizedString") (xsd-string-type) ())
607 (defxsd (xsd-token-type "token") (normalized-string-type) ())
612 (defxsd (language-type "language") (xsd-token-type) ())
617 (defxsd (name-type "Name") (xsd-token-type) ())
622 (defxsd (ncname-type "NCName") (name-type) ())
624 (defmethod equal-using-type ((type ncname-type
) u v
)
627 (defun nc-name-p (str)
628 (and (namep str
) (cxml::nc-name-p str
)))
630 (defmethod %parse
((type ncname-type
) e context
)
631 (setf e
(normalize-whitespace e
))
638 (defxsd (id-type "ID") (ncname-type) ())
643 (defxsd (idref-type "IDREF") (id-type) ())
649 (defxsd (idrefs-type "IDREFS") (xsd-type) ())
654 (defxsd (entity-type "ENTITY") (ncname-type) ())
660 (defxsd (entities-type "ENTITIES") (xsd-type) ())
665 (defxsd (nmtoken-type "NMTOKEN") (xsd-token-type) ())
670 (defxsd (nmtokens-type "NMTOKENS") (nmtoken-type) ())
675 (defxsd (integer-type "integer") (decimal-type) ())
678 ;;; nonPositiveInteger
680 (defxsd (non-positive-integer-type "nonPositiveInteger") (integer-type) ())
683 ;;; nonPositiveInteger
685 (defxsd (negative-integer-type "negativeInteger") (non-positive-integer-type)
691 (defxsd (long-type "long") (integer-type) ())
696 (defxsd (int-type "int") (long-type) ())
701 (defxsd (short-type "short") (int-type) ())
706 (defxsd (bite-type "byte") (short-type) ())
709 ;;; nonNegativeInteger
711 (defxsd (non-negative-integer-type "nonNegativeInteger") (integer-type) ())
716 (defxsd (unsigned-long-type "unsignedLong") (non-negative-integer-type) ())
721 (defxsd (unsigned-int-type "unsignedInt") (unsigned-long-type) ())
726 (defxsd (unsigned-short-type "unsignedShort") (unsigned-int-type) ())
731 (defxsd (unsigned-byte-type "unsignedByte") (unsigned-short-type) ())
736 (defxsd (positive-integer-type "positiveInteger") (non-negative-integer-type)