ENTITY
[cxml-rng.git] / types.lisp
blob712660ae519953b62994316dd362fc6e88bc09fa
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
2 ;;;
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8 ;;;
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11 ;;;
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 (in-package :cxml-types)
31 (defclass data-type () ()
32 (:documentation
33 "@short{The abstract superclass of all types.}
35 Each type belongs to a datatype library, named by a keyword. In each
36 library, the types are named by strings.
38 @see-constructor{find-type}
39 @see-slot{type-name}
40 @see-slot{type-library}
41 @see-slot{type-context-dependent-p}
42 @see{parse}
43 @see{equal-using-type}
44 @see{lessp-using-type}
45 @see{validp}"))
47 (defgeneric find-type (library name &key &allow-other-keys)
48 (:documentation
49 "@arg[library]{datatype library, a keyword symbol}
50 @arg[name]{the type's name, a string}
51 @arg[args]{type parameters, strings named by keyword arguments}
52 @return{an instance of @class{data-type}, or @code{nil}}
53 @short{Look up the type named @em{name} in datatype library @em{library}.}
55 Return a type instance for this type and the additional parameters,
56 or @code{nil} if the type does not exist.
58 Additional parameters (knows as facets in XSD) can be passed to specify
59 or restrict the type for the purposes of @fun{validp}.
61 @see{data-type}"))
63 (defgeneric type-library (type)
64 (:documentation
65 "@arg[type]{an instance of @class{data-type}}
66 @return{library name, a keyword}
67 @short{Return the name of the library this type belongs to.}
69 @see{type-name}
70 @see{type-context-dependent-p}"))
72 (defgeneric type-name (type)
73 (:documentation
74 "@arg[type]{an instance of @class{data-type}}
75 @return{type name, a string}
76 @short{Return the name this type has within its library.}
78 @see{type-library}
79 @see{type-context-dependent-p}"))
81 (defmethod find-type ((library t) name &key &allow-other-keys)
82 (declare (ignore name))
83 nil)
85 (defgeneric type-context-dependent-p (type)
86 (:documentation
87 "@arg[type]{an instance of @class{data-type}}
88 @return{a boolean}
89 @short{Return true if parsing and validation of values by this type
90 depends on the validation context.}
92 In this case, the optional @code{context} argument to @fun{parse} and
93 @fun{validp} is required, and an error will be signalled if it is missing.
95 @see{validation-context}
96 @see{type-name}
97 @see{type-library}
98 @see{type-context-dependent-p}"))
100 (defmethod type-context-dependent-p ((type data-type))
101 nil)
103 (defgeneric equal-using-type (type u v)
104 (:documentation
105 "@arg[type]{an instance of @class{data-type}}
106 @arg[u]{a parsed value as returned by @fun{parse}}
107 @arg[v]{a parsed value as returned by @fun{parse}}
108 @return{a boolean}
109 @short{Compare the @emph{values} @code{u} and @code{v} using a
110 data-type-dependent equality function.}
112 @see{validp}"))
114 (defgeneric parse (type e &optional context)
115 (:documentation
116 "@arg[type]{an instance of @class{data-type}}
117 @arg[e]{a string}
118 @arg[context]{an instance of @class{validation-context}}
119 @return{an object}
120 @short{Parse string @code{e} and return a representation of its value
121 as defined by the data type.}
123 The @code{context} argument is required if @fun{type-context-dependent-p}
124 is true for @code{type}, and will be ignored otherwise.
126 @see{equal-using-type}
127 @see{validp}"))
129 (defgeneric validp (type e &optional context)
130 (:documentation
131 "@arg[type]{an instance of @class{data-type}}
132 @arg[e]{a string}
133 @arg[context]{an instance of @class{validation-context}}
134 @return{a boolean}
135 @short{Determine whether a string is a valid lexical representation
136 for a type.}
138 The @code{context} argument is required if @fun{type-context-dependent-p}
139 is true for @code{type}, and will be ignored otherwise.
141 @see{parse}
142 @see{equal-using-type}"))
145 ;;; Validation context
147 (defclass validation-context () ()
148 (:documentation
149 "@short{This abstract class defines a protocol allowing data types
150 to query the XML parser about its current state.}
152 Some types are context dependent, as indicated by
153 @fun{type-context-dependent-p}. Those types need access to state
154 computed by the XML parser implicitly, like namespace bindings or
155 the Base URI.
157 User-defined subclasses must implement methods
158 for the functions @fun{context-find-namespace-binding} and
159 @fun{context-find-unparsed-entity}.
161 Two pre-defined validation context implementations are
162 provided, one for use with SAX, the other based on Klacks."))
164 (defgeneric context-find-namespace-binding (context prefix)
165 (:documentation
166 "@arg[context]{an instance of @class{validation-context}}
167 @arg[prefix]{name prefix, a string}
168 @return{the namespace URI as a string, or NIL}
169 @short{This function resolves a namespace prefix to a namespace URI in the
170 current context.}
171 All currently declared namespaces
172 are taken into account, including those declared directly on the
173 current element."))
175 (defgeneric context-find-unparsed-entity (context name)
176 (:documentation
177 "@arg[context]{an instance of @class{validation-context}}
178 @arg[name]{entity name, a string}
179 @return{@code{nil}, or a list of public id, system id, and notation name}
180 This function looks for an unparsed entity in the current context."))
182 (defclass klacks-validation-context (validation-context)
183 ((source :initarg :source :accessor context-source))
184 (:documentation
185 "A validation-context implementation that queries
186 a klacks source for information about the parser's current state.
187 @see-constructor{make-klacks-validation-context}"))
189 (defun make-klacks-validation-context (source)
190 "@arg[source]{a @a[http://common-lisp.net/project/cxml/klacks.html]{
191 klacks source}}
192 @return{a @class{klacks-validation-context}}
193 Create a validation-context that will query the given klacks source for
194 the current parser context."
195 (make-instance 'klacks-validation-context :source source))
197 (defmethod context-find-namespace-binding
198 ((context klacks-validation-context) prefix)
199 (klacks:find-namespace-binding prefix (context-source context)))
201 ;; zzz nicht schoen.
202 (defmethod context-find-unparsed-entity
203 ((context klacks-validation-context) name)
204 (or (dolist (x (slot-value (context-source context)
205 'cxml::external-declarations))
206 (when (and (eq (car x) 'sax:unparsed-entity-declaration)
207 (equal (cadr x) name))
208 (return t)))
209 (dolist (x (slot-value (context-source context)
210 'cxml::internal-declarations))
211 (when (and (eq (car x) 'sax:unparsed-entity-declaration)
212 (equal (cadr x) name))
213 (return t)))))
215 (defclass sax-validation-context-mixin (validation-context)
216 ((stack :initform nil :accessor context-stack)
217 (unparsed-entities :initform (make-hash-table :test 'equal)
218 :accessor unparsed-entities))
219 (:documentation
220 "@short{A class that implements validation-context as a mixin for
221 user-defined SAX handler classes.}
223 The mixin will record namespace information
224 automatically, and the user's SAX handler can simply be passed as a
225 validation context to data type functions."))
227 (defmethod sax:start-prefix-mapping
228 ((handler sax-validation-context-mixin) prefix uri)
229 (push (cons prefix uri) (context-stack handler)))
231 (defmethod sax:end-prefix-mapping
232 ((handler sax-validation-context-mixin) prefix)
233 (setf (context-stack handler)
234 (remove prefix
235 (context-stack handler)
236 :count 1
237 :key #'car
238 :test #'equal)))
240 (defmethod sax:unparsed-entity-declaration
241 ((context sax-validation-context-mixin)
242 name public-id system-id notation-name)
243 (setf (gethash name (unparsed-entities context))
244 (list public-id system-id notation-name)))
246 (defmethod context-find-namespace-binding
247 ((context sax-validation-context-mixin) prefix)
248 (cdr (assoc prefix (context-stack context) :test #'equal)))
250 (defmethod context-find-unparsed-entity
251 ((context sax-validation-context-mixin) name)
252 (gethash name (unparsed-entities context)))
255 ;;; Relax NG built-in type library
257 (defclass rng-type (data-type) ()
258 (:documentation
259 "@short{The class of Relax NG built-in types.}
260 Relax NG defines two built-in data type: string and token.
262 The Relax NG type library is named @code{:||}."))
264 (defclass string-type (rng-type) ()
265 (:documentation
266 "@short{The Relax NG 'string' type.}
267 This data type allows arbitrary strings and interprets them as-is.
269 For this type, @fun{parse} will return any string unchanged, and
270 @fun{equal-using-type} compares strings using @code{equal}."))
272 (defclass token-type (rng-type) ()
273 (:documentation
274 "@short{The Relax NG 'token' type.}
275 This data type allows arbitrary strings and normalizes all whitespaces.
277 For this type, @fun{parse} will return the string with leading and
278 trailing whitespace removed, and remaining sequences of spaces
279 compressed down to one space character each.
281 A method for @fun{equal-using-type} compares strings using @code{equal}."))
283 (defmethod type-library ((type rng-type))
284 :||)
286 (defvar *string-data-type* (make-instance 'string-type))
287 (defvar *token-data-type* (make-instance 'token-type))
289 (defmethod find-type ((library (eql :||)) name &rest args &key)
290 (cond
291 ((eq name :probe) t)
292 (args nil)
293 ((equal name "string") *string-data-type*)
294 ((equal name "token") *token-data-type*)
295 (t nil)))
297 (defmethod equal-using-type ((type rng-type) u v)
298 (equal u v))
300 (defmethod validp ((type rng-type) e &optional context)
301 (declare (ignore e context))
304 (defmethod type-name ((type string-type)) "string")
305 (defmethod type-name ((type token-type)) "token")
307 (defmethod parse ((type string-type) e &optional context)
308 (declare (ignore context))
311 (defmethod parse ((type token-type) e &optional context)
312 (declare (ignore context))
313 (normalize-whitespace e))
315 (eval-when (:compile-toplevel :load-toplevel :execute)
316 (defparameter *whitespace*
317 (format nil "~C~C~C~C"
318 (code-char 9)
319 (code-char 32)
320 (code-char 13)
321 (code-char 10))))
323 (defun normalize-whitespace (str)
324 (cl-ppcre:regex-replace-all #.(format nil "[~A]+" *whitespace*)
325 (string-trim *whitespace* str)
326 " "))
328 (defun replace-whitespace (str)
329 (cl-ppcre:regex-replace-all #.(format nil "[~A]" *whitespace*)
331 " "))
334 ;;; XML Schema Part 2: Datatypes Second Edition
336 (defparameter *xsd-types* (make-hash-table :test 'equal))
338 (defmacro defxsd
339 ((class-name type-name) (&rest supers) (&rest slots) &rest args)
340 `(progn
341 (setf (gethash ,type-name *xsd-types*) ',class-name)
342 (defclass ,class-name ,supers
343 ((type-name :initform ,type-name
344 :reader type-name
345 :allocation :class)
346 ,@slots)
347 ,@args)))
349 (defclass xsd-type (data-type)
350 ((pattern :initform nil :initarg :pattern :reader pattern)
351 (spec-pattern :initform nil :reader spec-pattern :allocation :class))
352 (:documentation
353 "@short{The class of XML Schema built-in types.}
355 Subclasses of xsd-type provide the built-in types of
356 @a[http://www.w3.org/TR/xmlschema-2/]{
357 XML Schema Part 2: Datatypes Second Edition}
358 as specified in @a[http://relaxng.org/xsd-20010907.html]{Guidelines for
359 using W3C XML Schema Datatypes with RELAX NG}.
361 The XSD type library
362 is named @code{:|http://www.w3.org/2001/XMLSchema-datatypes|}."))
364 (defmethod initialize-instance ((instance xsd-type)
365 &rest args
366 &key ((:|minLength| min-length))
367 ((:|maxLength| max-length))
368 ((:|length| exact-length)))
369 (apply #'call-next-method
370 instance
371 :min-length (when min-length
372 ;; fixme: richtigen fehler
373 (parse-integer min-length))
374 :max-length (when max-length
375 ;; fixme: richtigen fehler
376 (parse-integer max-length))
377 :exact-length (when exact-length
378 ;; fixme: richtigen fehler
379 (parse-integer exact-length))
380 args))
382 (defmethod type-library ((type xsd-type))
383 :|http://www.w3.org/2001/XMLSchema-datatypes|)
385 (defmethod find-type
386 ((library (eql :|http://www.w3.org/2001/XMLSchema-datatypes|))
387 name
388 &rest args &key)
389 args ;fixme
390 (if (eq name :probe)
392 (let ((class (gethash name *xsd-types*)))
393 (if class
394 (apply #'make-instance class args)
395 nil))))
397 (defgeneric parse/xsd (type e context))
399 (defgeneric validp/xsd (type v context)
400 (:method-combination and))
402 (defmethod validp/xsd and ((type xsd-type) v context)
403 (declare (ignore context))
404 ;; zzz
405 #+(or)
406 (and (or (null (pattern type))
407 (cl-ppcre:all-matches (pattern type) v))
408 (or (null (spec-pattern type))
409 (cl-ppcre:all-matches (spec-pattern type) v)))
412 (defmethod validp ((type xsd-type) e &optional context)
413 (not (eq :error (parse/xsd type e context))))
415 (defmethod parse ((type xsd-type) e &optional context)
416 (let ((result (parse/xsd type e context)))
417 (when (eq result :error)
418 (error "not valid for data type ~A: ~S" type e))
419 result))
421 ;; Handle the whiteSpace "facet" before the subclass sees it.
422 ;; If parsing succeded, check other facets by asking validp/xsd.
423 (defmethod parse/xsd :around ((type xsd-type) e context)
424 (let ((result (call-next-method type
425 (munge-whitespace type e)
426 context)))
427 (if (or (eq result :error) (validp/xsd type e context))
428 result
429 :error)))
431 (defgeneric munge-whitespace (type e))
433 (defmethod munge-whitespace ((type xsd-type) e)
434 (normalize-whitespace e))
437 ;;; ordering-mixin
439 (defclass ordering-mixin ()
440 ((min-exclusive :initform nil
441 :initarg :min-exclusive
442 :accessor min-exclusive)
443 (max-exclusive :initform nil
444 :initarg :max-exclusive
445 :accessor max-exclusive)
446 (min-inclusive :initform nil
447 :initarg :min-inclusive
448 :accessor min-inclusive)
449 (max-inclusive :initform nil
450 :initarg :max-inclusive
451 :accessor max-inclusive)))
453 (defgeneric lessp-using-type (type u v)
454 (:documentation
455 "@arg[type]{an ordered @class{data-type}}
456 @arg[u]{a parsed value as returned by @fun{parse}}
457 @arg[v]{a parsed value as returned by @fun{parse}}
458 @return{a boolean}
459 @short{Compare the @emph{values} @code{u} and @code{v} using a
460 data-type-dependent partial ordering.}
462 A method for this function is provided only by types that have a
463 natural partial ordering. The ordering is described in the
464 documentation for the type.
466 @see{equal-using-type}"))
468 (defun <-using-type (type u v)
469 (lessp-using-type type u v))
471 (defun <=-using-type (type u v)
472 (or (lessp-using-type type u v) (equal-using-type type u v)))
474 ;; it's only a partial ordering, so in general this is not the opposite of <=
475 (defun >-using-type (type u v)
476 (lessp-using-type type v u))
478 ;; it's only a partial ordering, so in general this is not the opposite of <
479 (defun >=-using-type (type u v)
480 (or (lessp-using-type type v u) (equal-using-type type v u)))
482 (defmethod validp/xsd and ((type ordering-mixin) v context)
483 (declare (ignore context))
484 (with-slots (min-exclusive max-exclusive min-inclusive max-inclusive) type
485 (and (or (null min-exclusive) (>-using-type type v min-exclusive))
486 (or (null max-exclusive) (<-using-type type v max-exclusive))
487 (or (null min-inclusive) (>=-using-type type v min-inclusive))
488 (or (null max-inclusive) (<=-using-type type v max-inclusive)))))
491 ;;; length-mixin
493 (defclass length-mixin ()
494 ((exact-length :initform nil :initarg :exact-length :accessor exact-length)
495 (min-length :initform nil :initarg :min-length :accessor min-length)
496 (max-length :initform nil :initarg :max-length :accessor max-length)))
498 ;; extra-hack fuer die "Laenge" eines QName...
499 (defgeneric length-using-type (type u))
500 (defmethod length-using-type ((type length-mixin) e) (length e))
502 (defmethod validp/xsd and ((type length-mixin) v context)
503 (declare (ignore context))
504 (with-slots (exact-length min-length max-length) type
505 (or (not (or exact-length min-length max-length))
506 (let ((l (length-using-type type v)))
507 (and (or (null exact-length) (eql l exact-length))
508 (or (null min-length) (>= l min-length))
509 (or (null max-length) (<= l max-length)))))))
512 ;;; enumeration-type
514 (defclass enumeration-type (xsd-type length-mixin)
515 ((word-type :reader word-type)))
517 (defmethod initialize-instance :after ((type enumeration-type) &key)
518 (setf (min-length type) (max* 1 (min-length type))))
520 (defmethod parse/xsd ((type enumeration-type) e context)
521 (let ((wt (word-type type)))
522 (loop
523 for word in (cl-ppcre:split " " e)
524 for v = (parse wt word context)
525 collect v
526 when (eq v :error) do (return :error))))
530 ;;;; Primitive types
532 ;;; duration
534 (defxsd (duration-type "duration") (xsd-type ordering-mixin) ())
536 (defmethod equal-using-type ((type duration-type) u v)
537 (equal u v))
539 ;; zzz das ist vielleicht ein bisschen zu woertlich implementiert
540 (defmethod lessp-using-type ((type duration-type) u v)
541 (let ((dt (make-instance 'date-time-type)))
542 (every (lambda (str)
543 (let ((s (parse dt str nil)))
544 (lessp-using-type dt
545 (datetime+duration s u)
546 (datetime+duration s v))))
547 '("1696-09-01T00:00:00Z"
548 "1697-02-01T00:00:00Z"
549 "1903-03-01T00:00:00Z"
550 "1903-07-01T00:00:00Z"))))
552 (defun datetime+duration (s d)
553 (destructuring-bind (syear smonth sday shour sminute ssecond szone) s
554 (destructuring-bind (dyear dmonth dday dhour dminute dsecond) d
555 (labels ((floor3 (a low high)
556 (multiple-value-bind (u v)
557 (floor (- a low) (- high low))
558 (values u (+ low v))))
559 (maximum-day-in-month-for (yearvalue monthvalue)
560 (multiple-value-bind (m y)
561 (floor3 monthvalue 1 13)
562 (day-limit m (+ yearvalue y)))))
563 (multiple-value-bind (carry emonth) (floor3 (+ smonth dmonth) 1 13)
564 (let ((eyear (+ syear dyear carry))
565 (ezone szone))
566 (multiple-value-bind (carry esecond) (floor (+ ssecond dsecond) 60)
567 (multiple-value-bind (carry eminute)
568 (floor (+ sminute dminute carry) 60)
569 (multiple-value-bind (carry ehour)
570 (floor (+ shour dhour carry) 24)
571 (let* ((mdimf (maximum-day-in-month-for eyear emonth))
572 (tmpdays (max 1 (min sday mdimf)))
573 (eday (+ tmpdays dday carry)))
574 (loop
575 (let* ((mdimf (maximum-day-in-month-for eyear emonth))
576 (carry
577 (cond
578 ((< eday 1)
579 (setf eday (+ eday mdimf))
581 ((> eday mdimf)
582 (setf eday (- eday mdimf))
585 (return))))
586 (tmp (+ emonth carry)))
587 (multiple-value-bind (y m)
588 (floor3 tmp 1 13)
589 (setf emonth m)
590 (incf eyear y))))
591 (list eyear emonth eday ehour eminute esecond
592 ezone)))))))))))
594 (defun scan-to-strings (&rest args)
595 (coerce (apply #'cl-ppcre:scan-to-strings args) 'list))
597 (defmethod parse/xsd ((type duration-type) e context)
598 (declare (ignore context))
599 (destructuring-bind (&optional minusp y m d tp h min s)
600 (scan-to-strings "(?x)
601 ^(-)? # minus
602 P(?:(\\d+)Y)? # years
603 (?:(\\d+)M)? # months
604 (?:(\\d+)D)? # days
605 (T # (time)
606 (?:(\\d+)H)? # hours
607 (?:(\\d+)M)? # minutes
608 (?:(\\d+(?:[.]\\d+)?)S)? # seconds
609 )?$"
611 (if (and (or y m d h min s)
612 (or (null tp) (or h min s)))
613 (let ((f (if minusp -1 1)))
614 (flet ((int (str)
615 (and str (* f (parse-integer str)))))
616 (list (int y) (int m) (int d) (int h) (int min)
617 (and s (* f (parse-number:parse-number s))))))
618 :error)))
621 ;;; dateTime
623 (defclass time-ordering-mixin (ordering-mixin) ())
625 (defxsd (date-time-type "dateTime") (xsd-type time-ordering-mixin) ())
627 (defmethod equal-using-type ((type time-ordering-mixin) u v)
628 (equal u v))
630 ;; add zone-offset as a duration (if any), but keep a boolean in the
631 ;; zone-offset field indicating whether there was a time-zone
632 (defun normalize-date-time (u)
633 (destructuring-bind (year month day hour minute second zone-offset) u
634 (let ((v (list year month day hour minute second (and zone-offset t))))
635 (if zone-offset
636 (multiple-value-bind (h m)
637 (truncate zone-offset)
638 (datetime+timezone v h (* m 100)))
639 v))))
641 (defun datetime+timezone (d h m)
642 (datetime+duration d (list 0 0 0 h m 0)))
644 (defmethod lessp-using-type ((type time-ordering-mixin) p q)
645 (destructuring-bind (pyear pmonth pday phour pminute psecond pzone)
646 (normalize-date-time p)
647 (destructuring-bind (qyear qmonth qday qhour qminute qsecond qzone)
648 (normalize-date-time q)
649 (cond
650 ((and pzone (not qzone))
651 (lessp-using-type type p (datetime+timezone q 14 0)))
652 ((and (not pzone) qzone)
653 (lessp-using-type type (datetime+timezone p -14 0) q))
655 ;; zzz hier sollen wir <> liefern bei Feldern, die in genau einer
656 ;; der Zeiten fehlen. Wir stellen aber fehlende Felder derzeit
657 ;; defaulted dar, koennen diese Situation also nicht feststellen.
658 ;; Einen Unterschied sollte das nur machen, wenn Werte verschiedener
659 ;; Datentypen miteinander verglichen werden. Das bieten wir einfach
660 ;; nicht an.
661 (loop
662 for a in (list pyear pmonth pday phour pminute psecond)
663 for b in (list qyear qmonth qday qhour qminute qsecond)
665 (when (< a b)
666 (return t))
667 (when (> a b)
668 (return nil))))))))
670 (defun day-limit (m y)
671 (cond
672 ((and (eql m 2)
673 (or (zerop (mod y 400))
674 (and (zerop (mod y 4))
675 (not (zerop (mod y 100))))))
677 ((eql m 2) 28)
678 ((oddp y) 31)
679 (t 30)))
681 (defmethod parse-time (minusp y m d h min s tz tz-sign tz-h tz-m
682 &key (start 0) end)
683 (declare (ignore start end)) ;zzz
684 ;; parse into numbers
685 (flet ((int (str)
686 (and str (parse-integer str)))
687 (num (str)
688 (and str (parse-number:parse-number str))))
689 (setf (values y m d h min s tz-h tz-m)
690 (values (* (int y) (if minusp -1 1))
691 (int m) (int d) (int h) (int min)
692 (num s)
693 (int tz-h) (int tz-m))))
694 (let ((day-limit (day-limit m y)))
695 ;; check ranges
696 (cond
697 ((and y
698 (plusp y)
699 (<= 1 m 12)
700 (<= 1 d day-limit)
701 (<= 0 h 24)
702 (<= 0 m 59)
703 ;; zzz sind leap seconds immer erlaubt?
704 (<= 0 s 60))
705 ;; 24:00:00 must be canonicalized
706 (when (and (eql h 24) (zerop min) (zerop s))
707 (incf h)
708 (incf d)
709 (when (> d day-limit)
710 (setf d 1)
711 (incf m)
712 (when (> m 12)
713 (incf y))))
714 (let ((tz-offset
715 (when tz-h
716 (* (if (equal tz-sign "-") -1 1)
717 (+ tz-h (/ tz-m 100))))))
718 (list (* y (if minusp -1 1)) m d h min s tz-offset)
719 ;; (subseq ... start end)
722 :error))))
724 (defmethod parse/xsd ((type date-time-type) e context)
725 (declare (ignore context))
726 (destructuring-bind (&optional minusp y m d h min s tz tz-sign tz-h tz-m)
727 (scan-to-strings "(?x)
728 ^(-)? # opt. minus
729 ((?:[1-9]\d*)?\d{4}) # year
730 -(\d\d) # month
731 -(\d\d) # day
732 T # (time)
733 (\d\d) # hour
734 -(\d\d) # minute
735 -(\d+(?:[.]\\d+)?) # second
736 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
739 (parse-time minusp y m d h min s tz tz-sign tz-h tz-m)))
742 ;;; time
744 (defxsd (time-type "time") (xsd-type time-ordering-mixin) ())
746 (defmethod parse/xsd ((type time-type) e context)
747 (declare (ignore context))
748 (destructuring-bind (&optional h min s tz tz-sign tz-h tz-m)
749 (scan-to-strings "(?x)
750 ^(\d\d) # hour
751 -(\d\d) # minute
752 -(\d+(?:[.]\\d+)?) # second
753 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
756 (parse-time nil 1 1 1 h min s tz tz-sign tz-h tz-m
757 :start 3)))
760 ;;; date
762 (defxsd (date-type "date") (xsd-type time-ordering-mixin) ())
764 (defmethod parse/xsd ((type date-type) e context)
765 (declare (ignore context))
766 (destructuring-bind (&optional minusp y m d tz tz-sign tz-h tz-m)
767 (scan-to-strings "(?x)
768 ^(-)? # opt. minus
769 ((?:[1-9]\d*)?\d{4}) # year
770 -(\d\d) # month
771 -(\d\d) # day
772 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
775 (parse-time minusp y m d 0 0 0 tz tz-sign tz-h tz-m
776 :end 3)))
779 ;;; gYearMonth
781 (defxsd (year-month-type "gYearMonth") (xsd-type time-ordering-mixin) ())
783 (defmethod parse/xsd ((type year-month-type) e context)
784 (declare (ignore context))
785 (destructuring-bind (&optional minusp y m)
786 (scan-to-strings "(?x)
787 ^(-)? # opt. minus
788 ((?:[1-9]\d*)?\d{4}) # year
789 -(\d\d) # month
792 (parse-time minusp y m 1 0 0 0 nil nil nil nil
793 :end 2)))
796 ;;; gYear
798 (defxsd (year-type "gYear") (xsd-type time-ordering-mixin) ())
800 (defmethod parse/xsd ((type year-month-type) e context)
801 (declare (ignore context))
802 (destructuring-bind (&optional minusp y tz tz-sign tz-h tz-m)
803 (scan-to-strings "(?x)
804 ^(-)? # opt. minus
805 ((?:[1-9]\d*)?\d{4}) # year
806 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
809 (parse-time minusp y 1 1 0 0 0 tz tz-sign tz-h tz-m
810 :end 1)))
813 ;;; gMonthDay
815 (defxsd (month-day-type "gMonthDay") (xsd-type time-ordering-mixin) ())
817 (defmethod parse/xsd ((type month-day-type) e context)
818 (declare (ignore context))
819 (destructuring-bind (&optional m d tz tz-sign tz-h tz-m)
820 (scan-to-strings "(?x)
821 ^--(\d\d) # month
822 -(\d\d) # day
823 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
826 (parse-time nil 1 m d 0 0 0 tz tz-sign tz-h tz-m
827 :start 1 :end 3)))
830 ;;; gDay
832 (defxsd (day-type "gDay") (xsd-type time-ordering-mixin) ())
834 (defmethod parse/xsd ((type day-type) e context)
835 (declare (ignore context))
836 (destructuring-bind (&optional d tz tz-sign tz-h tz-m)
837 (scan-to-strings "(?x)
838 ---(\d\d) # day
839 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
842 (parse-time nil 1 1 d 0 0 0 tz tz-sign tz-h tz-m
843 :start 3 :end 4)))
846 ;;; gMonth
848 (defxsd (month-type "gMonth") (xsd-type time-ordering-mixin) ())
850 (defmethod parse/xsd ((type month-type) e context)
851 (declare (ignore context))
852 (destructuring-bind (&optional m tz tz-sign tz-h tz-m)
853 (scan-to-strings "(?x)
854 ^--(\d\d) # month
855 (([+-])(\d\d):(\d\d)|Z)? # opt timezone
858 (parse-time nil 1 m 1 0 0 0 tz tz-sign tz-h tz-m
859 :start 2 :end 3)))
862 ;;; boolean
864 (defxsd (boolean-type "boolean") (xsd-type) ())
866 (defmethod parse/xsd ((type boolean-type) e context)
867 (declare (ignore context))
868 (case (find-symbol e :keyword)
869 ((:|true| :|1|) t)
870 ((:|false| :|0|) nil)))
873 ;;; base64Binary
875 (defxsd (base64-binary-type "base64Binary") (xsd-type length-mixin) ())
877 (defmethod equal-using-type ((type base64-binary-type) u v)
878 (equalp u v))
880 (defmethod parse/xsd ((type base64-binary-type) e context)
881 (declare (ignore context))
882 (if (cl-ppcre:all-matches
883 "(?x)
884 ^(([A-Za-z0-9+/][ ]?[A-Za-z0-9+/][ ]?[A-Za-z0-9+/]
885 [ ]?[A-Za-z0-9+/][ ]?)*
886 (([A-Za-z0-9+/][ ]?[A-Za-z0-9+/][ ]?[A-Za-z0-9+/][ ]?[A-Za-z0-9+/])
887 | ([A-Za-z0-9+/][ ]?[A-Za-z0-9+/][ ]?[AEIMQUYcgkosw048][ ]?=)
888 | ([A-Za-z0-9+/][ ]?[AQgw][ ]?=[ ]?=)))?$"
890 (handler-case
891 (cl-base64:base64-string-to-usb8-array e)
892 (warning (c)
893 (error "unexpected failure in Base64 decoding: ~A" c)))
894 :error))
897 ;;; hexBinary
899 (defxsd (hex-binary-type "hexBinary") (xsd-type length-mixin) ())
901 (defmethod equal-using-type ((type hex-binary-type) u v)
902 (equalp u v))
904 (defmethod parse/xsd ((type hex-binary-type) e context)
905 (declare (ignore context))
906 (if (evenp (length e))
907 (let ((result
908 (make-array (/ (length e) 2) :element-type '(unsigned-byte 8))))
909 (loop
910 for i from 0 below (length e) by 2
911 for j from 0
913 (setf (elt result j)
914 (handler-case
915 (parse-integer e :start i :end (+ i 2) :radix 16)
916 (error ()
917 (return :error))))
918 finally (return result)))
919 :error))
922 ;;; float
924 (defxsd (float-type "float") (xsd-type ordering-mixin) ())
926 (defmethod equal-using-type ((type float-type) u v)
927 (= u v))
929 (defmethod lessp-using-type ((type float-type) u v)
930 (< u v))
932 ;; zzz nehme hier an, dass single-float in IEEE single float ist.
933 ;; Das stimmt unter LispWorks bestimmt wieder nicht.
934 (defmethod parse/xsd ((type float-type) e context)
935 (declare (ignore context))
936 (if (cl-ppcre:all-matches "^[+-]\d+([.]\d+)?([eE][+-]\d+)?$" e)
937 (coerce (parse-number:parse-number e) 'single-float)
938 :error))
941 ;;; decimal
943 (defxsd (decimal-type "decimal") (xsd-type ordering-mixin)
944 ((fraction-digits :initform nil
945 :initarg :fraction-digits
946 :accessor fraction-digits)
947 (total-digits :initform nil
948 :initarg :total-digits
949 :accessor total-digits)))
951 (defmethod lessp-using-type ((type decimal-type) u v)
952 (< u v))
954 (defmethod equal-using-type ((type decimal-type) u v)
955 (= u v))
957 (defmethod validp/xsd and ((type decimal-type) v context)
958 (declare (ignore context))
959 (with-slots (fraction-digits total-digits) type
960 (and (or (null fraction-digits)
961 (let* ((betrag (abs v))
962 (fraction (- betrag (truncate betrag)))
963 (scaled (* fraction (expt 10 fraction-digits))))
964 (zerop (mod scaled 1))))
965 (or (null total-digits)
966 (let ((scaled (abs v)))
967 (loop
968 until (zerop (mod scaled 1))
969 do (setf scaled (* scaled 10)))
970 (< scaled (expt 10 total-digits)))))))
972 (defmethod parse/xsd ((type decimal-type) e context)
973 (declare (ignore context))
974 (destructuring-bind (&optional a b)
975 (scan-to-strings "^([+-]\d+)(?:[.](\d+))?$" e)
976 (if a
977 (+ (parse-integer a)
978 (/ (parse-integer b) (expt 10 (length b))))
979 :error)))
982 ;;; double
984 (defxsd (double-type "double") (xsd-type ordering-mixin) ())
986 (defmethod equal-using-type ((type double-type) u v)
987 (= u v))
989 (defmethod lessp-using-type ((type double-type) u v)
990 (< u v))
992 ;; zzz nehme hier an, dass double-float in IEEE double float ist.
993 ;; Auch das ist nicht garantiert.
994 (defmethod parse/xsd ((type double-type) e context)
995 (declare (ignore context))
996 (if (cl-ppcre:all-matches "^[+-]\d+([.]\d+)?([eE][+-]\d+)?$" e)
997 (coerce (parse-number:parse-number e) 'double-float)
998 :error))
1001 ;;; AnyURi
1003 (defxsd (any-uri-type "anyURI") (xsd-type length-mixin) ())
1005 (defmethod equal-using-type ((type any-uri-type) u v)
1006 (equal u v))
1008 (defmethod parse/xsd ((type any-uri-type) e context)
1009 (cxml-rng::escape-uri e))
1012 ;;; QName
1013 ;;; NOTATION
1015 (defclass qname-like (xsd-type length-mixin) ())
1017 (defxsd (qname-type "QName") (qname-like) ())
1018 (defxsd (notation-type "NOTATION") (qname-like) ())
1020 (defstruct (qname (:constructor make-qname (uri lname length)))
1022 lname
1023 length)
1025 (defmethod length-using-type ((type qname-like) e)
1026 (qname-length e))
1028 (defmethod equal-using-type ((type qname-like) u v)
1029 (and (equal (qname-uri u) (qname-uri v))
1030 (equal (qname-lname u) (qname-lname v))))
1032 (defun namep (str)
1033 (and (not (zerop (length str)))
1034 (cxml::name-start-rune-p (elt str 0))
1035 (every #'cxml::name-rune-p str)))
1037 (defmethod parse/xsd ((type qname-like) e context)
1038 (handler-case
1039 (if (namep e)
1040 (multiple-value-bind (prefix local-name) (cxml::split-qname e)
1041 (let ((uri (when prefix
1042 (context-find-namespace-binding context prefix))))
1043 (if (and prefix (not uri))
1044 :error
1045 (make-qname uri local-name (length e)))))
1046 :error)
1047 (cxml:well-formedness-violation ()
1048 :error)))
1051 ;;; string
1053 (defxsd (xsd-string-type "string") (xsd-type length-mixin) ())
1055 (defmethod equal-using-type ((type xsd-string-type) u v)
1056 (equal u v))
1058 (defmethod munge-whitespace ((type xsd-string-type) e)
1061 (defmethod parse/xsd ((type xsd-string-type) e context)
1065 ;;;;
1066 ;;;; Derived types
1067 ;;;;
1069 ;;; normalizedString
1071 (defxsd (normalized-string-type "normalizedString") (xsd-string-type) ())
1073 (defmethod munge-whitespace ((type normalized-string-type) e)
1074 (replace-whitespace e))
1077 ;;; token
1079 (defxsd (xsd-token-type "token") (normalized-string-type) ())
1081 (defmethod munge-whitespace ((type xsd-token-type) e)
1082 (normalize-whitespace e))
1085 ;;; language
1087 (defxsd (language-type "language") (xsd-token-type)
1088 ((spec-pattern :initform "[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*"
1089 :reader spec-pattern
1090 :allocation :class)))
1093 ;;; Name
1095 (defxsd (name-type "Name") (xsd-token-type)
1096 ((spec-pattern :initform "\\i\\c*"
1097 :reader spec-pattern
1098 :allocation :class)))
1101 ;;; NCName
1103 (defxsd (ncname-type "NCName") (name-type)
1104 ((spec-pattern :initform "[\\i-[:]][\\c-[:]]*"
1105 :reader spec-pattern
1106 :allocation :class)))
1108 (defmethod equal-using-type ((type ncname-type) u v)
1109 (equal u v))
1111 (defun nc-name-p (str)
1112 (and (namep str) (cxml::nc-name-p str)))
1114 (defmethod parse/xsd ((type ncname-type) e context)
1115 ;; zzz mit pattern machen
1116 (if (nc-name-p e)
1118 :error))
1120 ;;; ID
1122 (defxsd (id-type "ID") (ncname-type) ())
1125 ;;; IDREF
1127 (defxsd (idref-type "IDREF") (id-type) ())
1130 ;;; IDREFS
1132 (defxsd (idrefs-type "IDREFS") (enumeration-type)
1133 ((word-type :initform (make-instance 'idref-type))))
1136 ;;; ENTITY
1138 (defxsd (entity-type "ENTITY") (ncname-type) ())
1140 (defmethod parse/xsd ((type entity-type) e context)
1141 (if (context-find-unparsed-entity context e)
1143 :error))
1146 ;;; ENTITIES
1148 (defxsd (entities-type "ENTITIES") (enumeration-type)
1149 ((word-type :initform (make-instance 'entity-type))))
1152 ;;; NMTOKEN
1154 (defxsd (nmtoken-type "NMTOKEN") (xsd-token-type)
1155 ((spec-pattern :initform "\\c+"
1156 :reader spec-pattern
1157 :allocation :class)))
1160 ;;; NMTOKENS
1162 (defxsd (nmtokens-type "NMTOKENS") (enumeration-type)
1163 ((word-type :initform (make-instance 'nmtoken-type))))
1166 ;;; integer
1168 (defxsd (integer-type "integer") (decimal-type) ())
1170 ;; period is forbidden, so there's no point in letting decimal handle parsing
1171 ;; fixme: sind fuehrende nullen nun erlaubt oder nicht? die spec sagt ja,
1172 ;; das pattern im schema nicht.
1173 (defmethod parse/xsd ((type integer-type) e context)
1174 (declare (ignore context))
1175 (if (cl-ppcre:all-matches "^[+-][1-9]\d*$" e)
1176 (parse-number:parse-number e)
1177 :error))
1180 ;;; nonPositiveInteger
1182 (defxsd (non-positive-integer-type "nonPositiveInteger") (integer-type) ())
1184 (defun min* (a b)
1185 (cond
1186 ((null a) b)
1187 ((null b) a)
1188 (t (min a b))))
1190 (defun max* (a b)
1191 (cond
1192 ((null a) b)
1193 ((null b) a)
1194 (t (max a b))))
1196 (defmethod initialize-instance :after ((type non-positive-integer-type) &key)
1197 (setf (max-inclusive type)
1198 (min* 0 (max-inclusive type))))
1201 ;;; nonPositiveInteger
1203 (defxsd (negative-integer-type "negativeInteger") (non-positive-integer-type)
1206 (defmethod initialize-instance :after ((type negative-integer-type) &key)
1207 (setf (max-inclusive type)
1208 (min* -1 (max-inclusive type))))
1211 ;;; long
1213 (defxsd (long-type "long") (integer-type) ())
1215 (defmethod initialize-instance :after ((type long-type) &key)
1216 (setf (max-inclusive type) (min* 9223372036854775807 (max-inclusive type)))
1217 (setf (min-inclusive type) (max* -9223372036854775808 (min-inclusive type))))
1220 ;;; int
1222 (defxsd (int-type "int") (long-type) ())
1224 (defmethod initialize-instance :after ((type int-type) &key)
1225 (setf (max-inclusive type) (min* 2147483647 (max-inclusive type)))
1226 (setf (min-inclusive type) (max* -2147483648 (min-inclusive type))))
1229 ;;; short
1231 (defxsd (short-type "short") (int-type) ())
1233 (defmethod initialize-instance :after ((type short-type) &key)
1234 (setf (max-inclusive type) (min* 32767 (max-inclusive type)))
1235 (setf (min-inclusive type) (max* -32768 (min-inclusive type))))
1238 ;;; byte
1240 (defxsd (byte-type "byte") (short-type) ())
1242 (defmethod initialize-instance :after ((type byte-type) &key)
1243 (setf (max-inclusive type) (min* 127 (max-inclusive type)))
1244 (setf (min-inclusive type) (max* -128 (min-inclusive type))))
1247 ;;; nonNegativeInteger
1249 (defxsd (non-negative-integer-type "nonNegativeInteger") (integer-type) ())
1251 (defmethod initialize-instance :after ((type non-negative-integer-type) &key)
1252 (setf (min-inclusive type) (max* 0 (min-inclusive type))))
1255 ;;; unsignedLong
1257 (defxsd (unsigned-long-type "unsignedLong") (non-negative-integer-type) ())
1259 (defmethod initialize-instance :after ((type unsigned-long-type) &key)
1260 (setf (max-inclusive type) (min* 18446744073709551615 (max-inclusive type))))
1263 ;;; unsignedInt
1265 (defxsd (unsigned-int-type "unsignedInt") (unsigned-long-type) ())
1267 (defmethod initialize-instance :after ((type unsigned-int-type) &key)
1268 (setf (max-inclusive type) (min* 4294967295 (max-inclusive type))))
1271 ;;; unsignedShort
1273 (defxsd (unsigned-short-type "unsignedShort") (unsigned-int-type) ())
1275 (defmethod initialize-instance :after ((type unsigned-short-type) &key)
1276 (setf (max-inclusive type) (min* 65535 (max-inclusive type))))
1279 ;;; unsignedByte
1281 (defxsd (unsigned-byte-type "unsignedByte") (unsigned-short-type) ())
1283 (defmethod initialize-instance :after ((type unsigned-byte-type) &key)
1284 (setf (max-inclusive type) (min* 255 (max-inclusive type))))
1287 ;;; positiveInteger
1289 (defxsd (positive-integer-type "positiveInteger") (non-negative-integer-type)
1292 (defmethod initialize-instance :after ((type positive-integer-type) &key)
1293 (setf (min-inclusive type) (max* 1 (min-inclusive type))))