date-time-type
[cxml-rng.git] / types.lisp
blob15206ee9b541f8987afa0f933f1f593d7a40e4bd
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
2 ;;;
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8 ;;;
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11 ;;;
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 (in-package :cxml-types)
31 (defclass data-type () ()
32 (:documentation
33 "@short{The abstract superclass of all types.}
35 Each type belongs to a datatype library, named by a keyword. In each
36 library, the types are named by strings.
38 @see-constructor{find-type}
39 @see-slot{type-name}
40 @see-slot{type-library}
41 @see-slot{type-context-dependent-p}
42 @see{parse}
43 @see{equal-using-type}
44 @see{validp}"))
46 (defgeneric find-type (library name &key &allow-other-keys)
47 (:documentation
48 "@arg[library]{datatype library, a keyword symbol}
49 @arg[name]{the type's name, a string}
50 @arg[args]{type parameters, strings named by keyword arguments}
51 @return{an instance of @class{data-type}, or @code{nil}}
52 @short{Look up the type named @em{name} in datatype library @em{library}.}
54 Return a type instance for this type and the additional parameters,
55 or @code{nil} if the type does not exist.
57 Additional parameters (knows as facets in XSD) can be passed to specify
58 or restrict the type for the purposes of @fun{validp}.
60 @see{data-type}"))
62 (defgeneric type-library (type)
63 (:documentation
64 "@arg[type]{an instance of @class{data-type}}
65 @return{library name, a keyword}
66 @short{Return the name of the library this type belongs to.}
68 @see{type-name}
69 @see{type-context-dependent-p}"))
71 (defgeneric type-name (type)
72 (:documentation
73 "@arg[type]{an instance of @class{data-type}}
74 @return{type name, a string}
75 @short{Return the name this type has within its library.}
77 @see{type-library}
78 @see{type-context-dependent-p}"))
80 (defmethod find-type ((library t) name &key &allow-other-keys)
81 (declare (ignore name))
82 nil)
84 (defgeneric type-context-dependent-p (type)
85 (:documentation
86 "@arg[type]{an instance of @class{data-type}}
87 @return{a boolean}
88 @short{Return true if parsing and validation of values by this type
89 depends on the validation context.}
91 In this case, the optional @code{context} argument to @fun{parse} and
92 @fun{validp} is required, and an error will be signalled if it is missing.
94 @see{validation-context}
95 @see{type-name}
96 @see{type-library}
97 @see{type-context-dependent-p}"))
99 (defmethod type-context-dependent-p ((type data-type))
100 nil)
102 (defgeneric equal-using-type (type u v)
103 (:documentation
104 "@arg[type]{an instance of @class{data-type}}
105 @arg[u]{a parsed value as returned by @fun{parse}}
106 @arg[v]{a parsed value as returned by @fun{parse}}
107 @return{a boolean}
108 @short{Compare the @emph{values} @code{u} and @code{v} using a
109 data-type-dependent equality function.}
111 @see{validp}"))
113 (defgeneric parse (type e &optional context)
114 (:documentation
115 "@arg[type]{an instance of @class{data-type}}
116 @arg[e]{a string}
117 @arg[context]{an instance of @class{validation-context}}
118 @return{an object}
119 @short{Parse string @code{e} and return a representation of its value
120 as defined by the data type.}
122 The @code{context} argument is required if @fun{type-context-dependent-p}
123 is true for @code{type}, and will be ignored otherwise.
125 @see{equal-using-type}
126 @see{validp}"))
128 (defgeneric validp (type e &optional context)
129 (:documentation
130 "@arg[type]{an instance of @class{data-type}}
131 @arg[e]{a string}
132 @arg[context]{an instance of @class{validation-context}}
133 @return{a boolean}
134 @short{Determine whether a string is a valid lexical representation
135 for a type.}
137 The @code{context} argument is required if @fun{type-context-dependent-p}
138 is true for @code{type}, and will be ignored otherwise.
140 @see{parse}
141 @see{equal-using-type}"))
144 ;;; Validation context
146 (defclass validation-context () ()
147 (:documentation
148 "@short{This abstract class defines a protocol allowing data types
149 to query the XML parser about its current state.}
151 Some types are context dependent, as indicated by
152 @fun{type-context-dependent-p}. Those types need access to state
153 computed by the XML parser implicitly, like namespace bindings or
154 the Base URI.
156 User-defined subclasses must implement a method
157 for the @fun{context-find-namespace-binding} function.
159 Two pre-defined validation context implementations are
160 provided, one for use with SAX, the other based on Klacks."))
162 (defgeneric context-find-namespace-binding (context prefix)
163 (:documentation
164 "@arg[context]{an instance of @class{validation-context}}
165 @arg[prefix]{name prefix, a string}
166 @return{the namespace URI as a string, or NIL}
167 @short{This function resolves a namespace prefix to a namespace URI in the
168 current context.}
169 All currently declared namespaces
170 are taken into account, including those declared directly on the
171 current element."))
173 (defclass klacks-validation-context (validation-context)
174 ((source :initarg :source :accessor context-source))
175 (:documentation
176 "A validation-context implementation that queries
177 a klacks source for information about the parser's current state.
178 @see-constructor{make-klacks-validation-context}"))
180 (defun make-klacks-validation-context (source)
181 "@arg[source]{a @a[http://common-lisp.net/project/cxml/klacks.html]{
182 klacks source}}
183 @return{a @class{klacks-validation-context}}
184 Create a validation-context that will query the given klacks source for
185 the current parser context."
186 (make-instance 'klacks-validation-context :source source))
188 (defmethod context-find-namespace-binding
189 ((context klacks-validation-context) prefix)
190 (klacks:find-namespace-binding prefix (context-source context)))
192 (defclass sax-validation-context-mixin (validation-context)
193 ((stack :initform nil :accessor context-stack))
194 (:documentation
195 "@short{A class that implements validation-context as a mixin for
196 user-defined SAX handler classes.}
198 The mixin will record namespace information
199 automatically, and the user's SAX handler can simply be passed as a
200 validation context to data type functions."))
202 (defmethod sax:start-prefix-mapping
203 ((handler sax-validation-context-mixin) prefix uri)
204 (push (cons prefix uri) (context-stack handler)))
206 (defmethod sax:end-prefix-mapping
207 ((handler sax-validation-context-mixin) prefix)
208 (setf (context-stack handler)
209 (remove prefix
210 (context-stack handler)
211 :count 1
212 :key #'car
213 :test #'equal)))
215 (defmethod context-find-namespace-binding
216 ((context sax-validation-context-mixin) prefix)
217 (cdr (assoc prefix (context-stack context) :test #'equal)))
220 ;;; Relax NG built-in type library
222 (defclass rng-type (data-type) ()
223 (:documentation
224 "@short{The class of Relax NG built-in types.}
225 Relax NG defines two built-in data type: string and token.
227 The Relax NG type library is named @code{:||}."))
229 (defclass string-type (rng-type) ()
230 (:documentation
231 "@short{The Relax NG 'string' type.}
232 This data type allows arbitrary strings and interprets them as-is.
234 For this type, @fun{parse} will return any string unchanged, and
235 @fun{equal-using-type} compares strings using @code{equal}."))
237 (defclass token-type (rng-type) ()
238 (:documentation
239 "@short{The Relax NG 'token' type.}
240 This data type allows arbitrary strings and normalizes all whitespaces.
242 For this type, @fun{parse} will return the string with leading and
243 trailing whitespace removed, and remaining sequences of spaces
244 compressed down to one space character each.
246 A method for @fun{equal-using-type} compares strings using @code{equal}."))
248 (defmethod type-library ((type rng-type))
249 :||)
251 (defvar *string-data-type* (make-instance 'string-type))
252 (defvar *token-data-type* (make-instance 'token-type))
254 (defmethod find-type ((library (eql :||)) name &rest args &key)
255 (cond
256 ((eq name :probe) t)
257 (args nil)
258 ((equal name "string") *string-data-type*)
259 ((equal name "token") *token-data-type*)
260 (t nil)))
262 (defmethod equal-using-type ((type rng-type) u v)
263 (equal u v))
265 (defmethod validp ((type rng-type) e &optional context)
266 (declare (ignore e context))
269 (defmethod type-name ((type string-type)) "string")
270 (defmethod type-name ((type token-type)) "token")
272 (defmethod parse ((type string-type) e &optional context)
273 (declare (ignore context))
276 (defmethod parse ((type token-type) e &optional context)
277 (declare (ignore context))
278 (normalize-whitespace e))
280 (eval-when (:compile-toplevel :load-toplevel :execute)
281 (defparameter *whitespace*
282 (format nil "~C~C~C~C"
283 (code-char 9)
284 (code-char 32)
285 (code-char 13)
286 (code-char 10))))
288 (defun normalize-whitespace (str)
289 (cl-ppcre:regex-replace-all #.(format nil "[~A]+" *whitespace*)
290 (string-trim *whitespace* str)
291 " "))
294 ;;; XML Schema Part 2: Datatypes Second Edition
296 (defparameter *xsd-types* (make-hash-table :test 'equal))
298 (defmacro defxsd
299 ((class-name type-name) (&rest supers) (&rest slots) &rest args)
300 `(progn
301 (setf (gethash ,type-name *xsd-types*) ',class-name)
302 (defclass ,class-name ,supers
303 ((type-name :initform ,type-name
304 :reader type-name
305 :allocation :class)
306 ,@slots)
307 ,@args)))
309 (defclass xsd-type (data-type)
310 ((min-length :initarg :min-length :accessor min-length)
311 (max-length :initarg :max-length :accessor max-length)
312 (exact-length :initarg :exact-length :accessor exact-length))
313 (:documentation
314 "@short{The class of XML Schema built-in types.}
316 Subclasses of xsd-type provide the built-in types of
317 @a[http://www.w3.org/TR/xmlschema-2/]{
318 XML Schema Part 2: Datatypes Second Edition}
319 as specified in @a[http://relaxng.org/xsd-20010907.html]{Guidelines for
320 using W3C XML Schema Datatypes with RELAX NG}.
322 The XSD type library
323 is named @code{:|http://www.w3.org/2001/XMLSchema-datatypes|}."))
325 (defmethod initialize-instance ((instance xsd-type)
326 &rest args
327 &key ((:|minLength| min-length))
328 ((:|maxLength| max-length))
329 ((:|length| exact-length)))
330 (apply #'call-next-method
331 instance
332 :min-length (when min-length
333 ;; fixme: richtigen fehler
334 (parse-integer min-length))
335 :max-length (when max-length
336 ;; fixme: richtigen fehler
337 (parse-integer max-length))
338 :exact-length (when exact-length
339 ;; fixme: richtigen fehler
340 (parse-integer exact-length))
341 args))
343 (defmethod type-library ((type xsd-type))
344 :|http://www.w3.org/2001/XMLSchema-datatypes|)
346 (defmethod find-type
347 ((library (eql :|http://www.w3.org/2001/XMLSchema-datatypes|))
348 name
349 &rest args &key)
350 args ;fixme
351 (if (eq name :probe)
353 (let ((class (gethash name *xsd-types*)))
354 (if class
355 (apply #'make-instance class args)
356 nil))))
358 (defgeneric %parse (type e context))
360 (defmethod validp ((type xsd-type) e &optional context)
361 (not (eq :error (%parse type e context))))
363 (defmethod parse ((type xsd-type) e &optional context)
364 (let ((result (%parse type e context)))
365 (when (eq result :error)
366 (error "not valid for data type ~A: ~S" type e))
367 result))
370 ;;; duration
372 (defxsd (duration-type "duration") (xsd-type) ())
374 (defmethod equal-using-type ((type duration-type) u v)
375 (equal u v))
377 (defmethod %parse ((type duration-type) e context)
378 (declare (ignore context))
379 (let ((strs
380 (cl-ppcre:scan-to-strings "(?x)
381 ^(-)? # minus
382 P(?:(\\d+)Y)? # years
383 (?:(\\d+)M)? # months
384 (?:(\\d+)D)? # days
385 (T # (time)
386 (?:(\\d+)H)? # hours
387 (?:(\\d+)M)? # minutes
388 (?:(\\d+(?:[.]\\d+)?)S)? # seconds
389 )?$"
390 e)))
391 (destructuring-bind (&optional minusp y m d tp h min s)
392 (coerce strs 'list)
393 (if (and (or y m d h min s)
394 (or (null tp) (or h min s)))
395 (let ((f (if minusp -1 1)))
396 (flet ((int (str)
397 (and str (* f (parse-integer str)))))
398 (list (int y) (int m) (int d) (int h) (int min)
399 (and s (* f (parse-number:parse-number s))))))
400 :error))))
403 ;;; dateTime
405 (defxsd (date-time-type "dateTime") (xsd-type) ())
407 (defmethod equal-using-type ((type duration-type) u v)
408 (equal u v))
410 ;; FIXME: Was ist denn nun mit der Zeitzone? Sollen wir die wegwerfen oder
411 ;; hat das was mit timeOnTimeline zu tun? Verstehe ich nicht.
412 (defmethod %parse ((type date-time-type) e context)
413 (declare (ignore context))
414 (let ((strs
415 (cl-ppcre:scan-to-strings "(?x)
416 ^(-)? # opt. minus
417 ((?:[1-9]\d*)?\d{4}) # year
418 -(\d\d) # month
419 -(\d\d) # day
420 T # (time)
421 (\d\d) # hour
422 -(\d\d) # minute
423 -(\d+(?:[.]\\d+)?) # second
424 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
426 e)))
427 (destructuring-bind (&optional minusp y m d h min s tz tz-sign tz-h tz-m)
428 (coerce strs 'list)
429 ;; parse into numbers
430 (flet ((int (str)
431 (and str (parse-integer str)))
432 (num (str)
433 (and str (parse-number:parse-number str))))
434 (setf (values y m d h min s tz-h tz-m)
435 (values (* (int y) (if minusp -1 1))
436 (int m) (int d) (int h) (int min)
437 (num s)
438 (int tz-h) (int tz-m))))
439 (let ((day-limit
440 (cond
441 ((and (eql m 2) (zerop (mod y 4)) (not (zerop (mod y 400)))) 29)
442 ((eql m 2) 28)
443 ((oddp y) 31)
444 (t 30))))
445 ;; check ranges
446 (if (and y
447 (plusp z)
448 (<= 1 m 12)
449 (<= 1 d day-limit)
450 (<= 0 h 24)
451 (<= 0 m 59)
452 ;; zzz sind leap seconds immer erlaubt?
453 (<= 0 s 60))
454 (list (* y (if minusp -1 1)) m d h min s)
455 :error)))))
458 ;;; time
460 (defxsd (time-type "time") (xsd-type) ())
464 ;;; date
466 (defxsd (date-type "date") (xsd-type) ())
470 ;;; gYearMonth
472 (defxsd (year-month-type "gYearMonth") (xsd-type) ())
476 ;;; gYear
478 (defxsd (year-type "gYear") (xsd-type) ())
482 ;;; gMonthDay
484 (defxsd (month-day-type "gMonthDay") (xsd-type) ())
488 ;;; gDay
490 (defxsd (day-type "gDay") (xsd-type) ())
494 ;;; gMonth
496 (defxsd (month-type "gMonth") (xsd-type) ())
500 ;;; boolean
502 (defxsd (boolean-type "boolean") (xsd-type) ())
505 ;;; base64Binary
507 (defxsd (base64-binary-type "base64Binary") (xsd-type) ())
510 ;;; hexBinary
512 (defxsd (hex-binary-type "hexBinary") (xsd-type) ())
515 ;;; float
517 (defxsd (float-type "float") (xsd-type) ())
520 ;;; decimal
522 (defxsd (decimal-type "decimal") (xsd-type) ())
525 ;;; double
527 (defxsd (double-type "double") (xsd-type) ())
530 ;;; AnyURi
532 (defxsd (any-uri-type "AnyURI") (xsd-type) ())
534 (defmethod equal-using-type ((type any-uri-type) u v)
535 (equal u v))
537 (defmethod %parse ((type any-uri-type) e context)
538 (cxml-rng::escape-uri (normalize-whitespace e)))
541 ;;; QName
543 (defxsd (qname-type "QName") (xsd-type) ())
545 (defstruct (qname (:constructor make-qname (uri lname)))
547 lname)
549 (defmethod equal-using-type ((type qname-type) u v)
550 (and (equal (qname-uri u) (qname-uri v))
551 (equal (qname-lname u) (qname-lname v))))
553 (defun namep (str)
554 (and (not (zerop (length str)))
555 (cxml::name-start-rune-p (elt str 0))
556 (every #'cxml::name-rune-p str)))
558 (defmethod %parse ((type qname-type) e context)
559 (setf e (normalize-whitespace e))
560 (handler-case
561 (if (namep e)
562 (multiple-value-bind (prefix local-name) (cxml::split-qname e)
563 (let ((uri (when prefix
564 (context-find-namespace-binding context prefix))))
565 (if (and prefix (not uri))
566 :error
567 (make-qname uri local-name))))
568 :error)
569 (cxml:well-formedness-violation ()
570 :error)))
573 ;;; NOTATION
575 (defxsd (notation-type "NOTATION") (xsd-type) ())
580 ;;; string
582 (defxsd (xsd-string-type "string") (xsd-type) ())
584 (defmethod equal-using-type ((type xsd-string-type) u v)
585 (equal u v))
587 (defmethod %parse ((type xsd-string-type) e context)
588 (if (or (and (min-length type) (< (length e) (min-length type)))
589 (and (max-length type) (> (length e) (max-length type)))
590 (and (exact-length type) (/= (length e) (exact-length type))))
591 :error
595 ;;;;
596 ;;;; Derived types
597 ;;;;
599 ;;; normalizedString
601 (defxsd (normalized-string-type "normalizedString") (xsd-string-type) ())
605 ;;; token
607 (defxsd (xsd-token-type "token") (normalized-string-type) ())
610 ;;; language
612 (defxsd (language-type "language") (xsd-token-type) ())
615 ;;; Name
617 (defxsd (name-type "Name") (xsd-token-type) ())
620 ;;; NCName
622 (defxsd (ncname-type "NCName") (name-type) ())
624 (defmethod equal-using-type ((type ncname-type) u v)
625 (equal u v))
627 (defun nc-name-p (str)
628 (and (namep str) (cxml::nc-name-p str)))
630 (defmethod %parse ((type ncname-type) e context)
631 (setf e (normalize-whitespace e))
632 (if (nc-name-p e)
634 :error))
636 ;;; ID
638 (defxsd (id-type "ID") (ncname-type) ())
641 ;;; IDREF
643 (defxsd (idref-type "IDREF") (id-type) ())
646 ;;; IDREFS
648 ;; fixme?
649 (defxsd (idrefs-type "IDREFS") (xsd-type) ())
652 ;;; ENTITY
654 (defxsd (entity-type "ENTITY") (ncname-type) ())
657 ;;; IDREFS
659 ;; fixme?
660 (defxsd (entities-type "ENTITIES") (xsd-type) ())
663 ;;; NMTOKEN
665 (defxsd (nmtoken-type "NMTOKEN") (xsd-token-type) ())
668 ;;; NMTOKENS
670 (defxsd (nmtokens-type "NMTOKENS") (nmtoken-type) ())
673 ;;; integer
675 (defxsd (integer-type "integer") (decimal-type) ())
678 ;;; nonPositiveInteger
680 (defxsd (non-positive-integer-type "nonPositiveInteger") (integer-type) ())
683 ;;; nonPositiveInteger
685 (defxsd (negative-integer-type "negativeInteger") (non-positive-integer-type)
689 ;;; long
691 (defxsd (long-type "long") (integer-type) ())
694 ;;; int
696 (defxsd (int-type "int") (long-type) ())
699 ;;; short
701 (defxsd (short-type "short") (int-type) ())
704 ;;; byte
706 (defxsd (bite-type "byte") (short-type) ())
709 ;;; nonNegativeInteger
711 (defxsd (non-negative-integer-type "nonNegativeInteger") (integer-type) ())
714 ;;; unsignedLong
716 (defxsd (unsigned-long-type "unsignedLong") (non-negative-integer-type) ())
719 ;;; unsignedInt
721 (defxsd (unsigned-int-type "unsignedInt") (unsigned-long-type) ())
724 ;;; unsignedShort
726 (defxsd (unsigned-short-type "unsignedShort") (unsigned-int-type) ())
729 ;;; unsignedByte
731 (defxsd (unsigned-byte-type "unsignedByte") (unsigned-short-type) ())
734 ;;; positiveInteger
736 (defxsd (positive-integer-type "positiveInteger") (non-negative-integer-type)