new release
[cxml/s11.git] / klacks / klacks-impl.lisp
blobf785bce6b28e1493f03699706a454d7b151e2235
1 ;;; -*- Mode: Lisp; readtable: runes; -*-
2 ;;; (c) copyright 2007 David Lichteblau
4 ;;; This library is free software; you can redistribute it and/or
5 ;;; modify it under the terms of the GNU Library General Public
6 ;;; License as published by the Free Software Foundation; either
7 ;;; version 2 of the License, or (at your option) any later version.
8 ;;;
9 ;;; This library is distributed in the hope that it will be useful,
10 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Library General Public License for more details.
13 ;;;
14 ;;; You should have received a copy of the GNU Library General Public
15 ;;; License along with this library; if not, write to the
16 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ;;; Boston, MA 02111-1307 USA.
19 (in-package :cxml)
21 (defclass cxml-source (klacks:source)
22 (;; args to make-source
23 (context :initarg :context)
24 (validate :initarg :validate)
25 (root :initarg :root)
26 (dtd :initarg :dtd)
27 (error-culprit :initarg :error-culprit)
28 ;; current state
29 (continuation)
30 (current-key :initform nil)
31 (current-values)
32 (current-attributes)
33 (cdata-section-p :reader klacks:current-cdata-section-p)
34 ;; extra WITH-SOURCE magic
35 (data-behaviour :initform :DTD)
36 (namespace-stack :initform (list *initial-namespace-bindings*))
37 (current-namespace-declarations)
38 (temporary-streams :initform nil)
39 (scratch-pad :initarg :scratch-pad)
40 (scratch-pad-2 :initarg :scratch-pad-2)
41 (scratch-pad-3 :initarg :scratch-pad-3)
42 (scratch-pad-4 :initarg :scratch-pad-4)))
44 (defmethod klacks:close-source ((source cxml-source))
45 (dolist (xstream (slot-value source 'temporary-streams))
46 ;; fixme: error handling?
47 (close-xstream xstream)))
49 (defmacro with-source ((source &rest slots) &body body)
50 (let ((s (gensym)))
51 `(let* ((,s ,source)
52 (*ctx* (slot-value ,s 'context))
53 (*validate* (slot-value ,s 'validate))
54 (*data-behaviour* (slot-value source 'data-behaviour))
55 (*namespace-bindings* (car (slot-value source 'namespace-stack)))
56 (*scratch-pad* (slot-value source 'scratch-pad))
57 (*scratch-pad-2* (slot-value source 'scratch-pad-2))
58 (*scratch-pad-3* (slot-value source 'scratch-pad-3))
59 (*scratch-pad-4* (slot-value source 'scratch-pad-4)))
60 (handler-case
61 (with-slots (,@slots) ,s
62 ,@body)
63 (runes-encoding:encoding-error (c)
64 (wf-error (slot-value ,s 'error-culprit) "~A" c))))))
66 (defun fill-source (source)
67 (with-slots (current-key current-values continuation) source
68 (unless current-key
69 (setf current-key :bogus)
70 (setf continuation (funcall continuation))
71 (assert (not (eq current-key :bogus))))))
73 (defmethod klacks:peek ((source cxml-source))
74 (with-source (source current-key current-values)
75 (fill-source source)
76 (apply #'values current-key current-values)))
78 (defmethod klacks:peek-value ((source cxml-source))
79 (with-source (source current-key current-values)
80 (fill-source source)
81 (apply #'values current-values)))
83 (defmethod klacks:peek-next ((source cxml-source))
84 (with-source (source current-key current-values)
85 (setf current-key nil)
86 (fill-source source)
87 (apply #'values current-key current-values)))
89 (defmethod klacks:consume ((source cxml-source))
90 (with-source (source current-key current-values)
91 (fill-source source)
92 (multiple-value-prog1
93 (apply #'values current-key current-values)
94 (setf current-key nil))))
96 (defmethod klacks:map-attributes (fn (source cxml-source))
97 (dolist (a (slot-value source 'current-attributes))
98 (funcall fn
99 (sax:attribute-namespace-uri a)
100 (sax:attribute-local-name a)
101 (sax:attribute-qname a)
102 (sax:attribute-value a)
103 (sax:attribute-specified-p a))))
105 (defmethod klacks:get-attribute
106 ((source cxml-source) lname &optional uri)
107 (dolist (a (slot-value source 'current-attributes))
108 (when (and (equal (sax:attribute-local-name a) lname)
109 (equal (sax:attribute-namespace-uri a) uri))
110 (return (sax:attribute-value a)))))
112 (defmethod klacks:list-attributes ((source cxml-source))
113 (slot-value source 'current-attributes))
115 (defun make-source
116 (input &rest args
117 &key validate dtd root entity-resolver disallow-internal-subset
118 (buffering t) pathname)
119 (declare (ignore validate dtd root entity-resolver disallow-internal-subset))
120 (etypecase input
121 (xstream
122 (when (and (not buffering) (< 1 (runes::xstream-speed input)))
123 (warn "make-source called with !buffering, but xstream is buffering"))
124 (let ((*ctx* nil))
125 (let ((zstream (make-zstream :input-stack (list input))))
126 (peek-rune input)
127 (with-scratch-pads ()
128 (apply #'%make-source
129 zstream
130 (loop
131 for (name value) on args by #'cddr
132 unless (member name '(:pathname :buffering))
133 append (list name value)))))))
134 (stream
135 (let ((xstream (make-xstream input :speed (if buffering 8192 1))))
136 (setf (xstream-name xstream)
137 (make-stream-name
138 :entity-name "main document"
139 :entity-kind :main
140 :uri (pathname-to-uri
141 (merge-pathnames (or pathname (pathname input))))))
142 (apply #'make-source xstream args)))
143 (pathname
144 (let* ((xstream
145 (make-xstream (open input :element-type '(unsigned-byte 8))
146 :speed (if buffering 8192 1))))
147 (setf (xstream-name xstream)
148 (make-stream-name
149 :entity-name "main document"
150 :entity-kind :main
151 :uri (pathname-to-uri (merge-pathnames input))))
152 (let ((source (apply #'make-source
153 xstream
154 :pathname input
155 args)))
156 (push xstream (slot-value source 'temporary-streams))
157 source)))
158 (rod
159 (let ((xstream (string->xstream input)))
160 (setf (xstream-name xstream)
161 (make-stream-name
162 :entity-name "main document"
163 :entity-kind :main
164 :uri nil))
165 (apply #'make-source xstream args)))
166 (array
167 (make-source (cxml::make-octet-input-stream input)))))
169 (defun %make-source
170 (input &key validate dtd root entity-resolver disallow-internal-subset
171 error-culprit)
172 ;; check types of user-supplied arguments for better error messages:
173 (check-type validate boolean)
174 (check-type dtd (or null extid))
175 (check-type root (or null rod))
176 (check-type entity-resolver (or null function symbol))
177 (check-type disallow-internal-subset boolean)
178 (let* ((xstream (car (zstream-input-stack input)))
179 (name (xstream-name xstream))
180 (base (when name (stream-name-uri name)))
181 (context
182 (make-context :main-zstream input
183 :entity-resolver entity-resolver
184 :base-stack (list (or base ""))
185 :disallow-internal-subset disallow-internal-subset))
186 (source
187 (make-instance 'cxml-source
188 :context context
189 :validate validate
190 :dtd dtd
191 :root root
192 :error-culprit error-culprit
193 :scratch-pad *scratch-pad*
194 :scratch-pad-2 *scratch-pad-2*
195 :scratch-pad-3 *scratch-pad-3*
196 :scratch-pad-4 *scratch-pad-4*)))
197 (setf (handler context) (make-instance 'klacks-dtd-handler :source source))
198 (setf (slot-value source 'continuation)
199 (lambda () (klacks/xmldecl source input)))
200 source))
202 (defun klacks/xmldecl (source input)
203 (with-source (source current-key current-values)
204 (let ((hd (p/xmldecl input)))
205 (setf current-key :start-document)
206 (setf current-values
207 (when hd
208 (list (xml-header-version hd)
209 (xml-header-encoding hd)
210 (xml-header-standalone-p hd))))
211 (lambda ()
212 (klacks/misc*-2 source input
213 (lambda ()
214 (klacks/doctype source input)))))))
216 (defun klacks/misc*-2 (source input successor)
217 (with-source (source current-key current-values)
218 (multiple-value-bind (cat sem) (peek-token input)
219 (case cat
220 (:COMMENT
221 (setf current-key :comment)
222 (setf current-values (list sem))
223 (consume-token input)
224 (lambda () (klacks/misc*-2 source input successor)))
225 (:PI
226 (setf current-key :processing-instruction)
227 (setf current-values (list (car sem) (cdr sem)))
228 (consume-token input)
229 (lambda () (klacks/misc*-2 source input successor)))
231 (consume-token input)
232 (klacks/misc*-2 source input successor))
234 (funcall successor))))))
236 (defun klacks/doctype (source input)
237 (with-source (source current-key current-values validate dtd)
238 (let ((cont (lambda () (klacks/finish-doctype source input)))
240 (prog1
241 (cond
242 ((eq (peek-token input) :<!DOCTYPE)
243 (setf l (cdr (p/doctype-decl input dtd)))
244 (lambda () (klacks/misc*-2 source input cont)))
245 (dtd
246 (setf l (cdr (synthesize-doctype dtd input)))
247 cont)
248 ((and validate (not dtd))
249 (validity-error "invalid document: no doctype"))
251 (return-from klacks/doctype
252 (funcall cont))))
253 (destructuring-bind (&optional name extid) l
254 (setf current-key :dtd)
255 (setf current-values
256 (list name
257 (and extid (extid-public extid))
258 (and extid (extid-system extid)))))))))
260 (defun klacks/finish-doctype (source input)
261 (with-source (source current-key current-values root data-behaviour)
262 (ensure-dtd)
263 (when root
264 (setf (model-stack *ctx*) (list (make-root-model root))))
265 (setf data-behaviour :DOC)
266 (setf *data-behaviour* :DOC)
267 (fix-seen-< input)
268 (let* ((final
269 (lambda ()
270 (klacks/eof source input)))
271 (next
272 (lambda ()
273 (setf data-behaviour :DTD)
274 (setf *data-behaviour* :DTD)
275 (klacks/misc*-2 source input final))))
276 (klacks/element source input next))))
278 (defun klacks/eof (source input)
279 (with-source (source current-key current-values)
280 (p/eof input)
281 (klacks:close-source source)
282 (setf current-key :end-document)
283 (setf current-values nil)
284 (lambda () (klacks/nil source))))
286 (defun klacks/nil (source)
287 (with-source (source current-key current-values)
288 (setf current-key nil)
289 (setf current-values nil)
290 (labels ((klacks/done ()
291 (setf current-key nil)
292 (setf current-values nil)
293 #'klacks/done))
294 #'klacks/done)))
296 (defun klacks/element (source input cont)
297 (with-source (source current-key current-values current-attributes
298 current-namespace-declarations)
299 (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
300 (setf current-key :start-element)
301 (setf current-values (list uri lname qname))
302 (setf current-attributes attrs)
303 (setf current-namespace-declarations new-b)
304 (if (eq cat :stag)
305 (lambda ()
306 (klacks/element-2 source input n-b cont))
307 (lambda ()
308 (klacks/ztag source cont))))))
310 (defun klacks/ztag (source cont)
311 (with-source (source current-key current-values current-attributes)
312 (setf current-key :end-element)
313 (setf current-attributes nil)
314 (validate-end-element *ctx* (third current-values))
315 cont))
317 (defun klacks/element-2 (source input n-b cont)
318 (with-source (source
319 current-key current-values current-attributes namespace-stack
320 current-namespace-declarations)
321 (let ((values* current-values)
322 (new-b current-namespace-declarations))
323 (setf current-attributes nil)
324 (push n-b namespace-stack)
325 (let ((finish
326 (lambda ()
327 (setf current-namespace-declarations new-b)
328 (klacks/element-3 source input values* cont))))
329 (klacks/content source input finish)))))
331 (defun klacks/element-3 (source input tag-values cont)
332 (with-source (source current-key current-values current-attributes)
333 (setf current-key :end-element)
334 (setf current-values tag-values)
335 (let ((qname (third tag-values)))
336 (p/etag input qname)
337 (validate-end-element *ctx* qname))
338 cont))
340 (defun klacks/content (source input cont)
341 (with-source (source current-key current-values cdata-section-p)
342 (let ((recurse (lambda () (klacks/content source input cont))))
343 (multiple-value-bind (cat sem) (peek-token input)
344 (case cat
345 ((:stag :ztag)
346 (klacks/element source input recurse))
347 ((:CDATA)
348 (process-characters input sem)
349 (setf current-key :characters)
350 (setf current-values (list sem))
351 (setf cdata-section-p nil)
352 recurse)
353 ((:ENTITY-REF)
354 (let ((name sem))
355 (consume-token input)
356 (klacks/entity-reference source input name recurse)))
357 ((:<!\[)
358 (setf current-key :characters)
359 (setf current-values (list (process-cdata-section input)))
360 (setf cdata-section-p t)
361 recurse)
362 ((:PI)
363 (setf current-key :processing-instruction)
364 (setf current-values (list (car sem) (cdr sem)))
365 (consume-token input)
366 recurse)
367 ((:COMMENT)
368 (setf current-key :comment)
369 (setf current-values (list sem))
370 (consume-token input)
371 recurse)
372 (otherwise
373 (funcall cont)))))))
375 (defun klacks/entity-reference (source zstream name cont)
376 (assert (not (zstream-token-category zstream)))
377 (with-source (source temporary-streams context)
378 (let ((new-xstream (entity->xstream zstream name :general nil)))
379 (push new-xstream temporary-streams)
380 (push :stop (zstream-input-stack zstream))
381 (zstream-push new-xstream zstream)
382 (push (stream-name-uri (xstream-name new-xstream)) (base-stack context))
383 (let ((next
384 (lambda ()
385 (klacks/entity-reference-2 source zstream new-xstream cont))))
386 (etypecase (checked-get-entdef name :general)
387 (internal-entdef
388 (klacks/content source zstream next))
389 (external-entdef
390 (klacks/ext-parsed-ent source zstream next)))))))
392 (defun klacks/entity-reference-2 (source zstream new-xstream cont)
393 (with-source (source temporary-streams context)
394 (unless (eq (peek-token zstream) :eof)
395 (wf-error zstream "Trailing garbage. - ~S" (peek-token zstream)))
396 (assert (eq (peek-token zstream) :eof))
397 (assert (eq (pop (zstream-input-stack zstream)) new-xstream))
398 (assert (eq (pop (zstream-input-stack zstream)) :stop))
399 (pop (base-stack context))
400 (setf (zstream-token-category zstream) nil)
401 (setf temporary-streams (remove new-xstream temporary-streams))
402 (close-xstream new-xstream)
403 (funcall cont)))
405 (defun klacks/ext-parsed-ent (source input cont)
406 (with-source (source)
407 (when (eq (peek-token input) :xml-decl)
408 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
409 (setup-encoding input hd))
410 (consume-token input))
411 (set-full-speed input)
412 (klacks/content source input cont)))
415 ;;;; terrible kludges
417 (defclass klacks-dtd-handler ()
418 ((handler-source :initarg :source :reader handler-source)
419 (internal-subset-p :initform nil :accessor handler-internal-subset-p)))
421 (defmethod sax:start-internal-subset ((handler klacks-dtd-handler))
422 (setf (slot-value (handler-source handler) 'internal-declarations) '())
423 (setf (handler-internal-subset-p handler) t))
425 (defmethod sax:end-internal-subset ((handler klacks-dtd-handler))
426 (setf (handler-internal-subset-p handler) nil))
428 (defmethod sax:entity-resolver ((handler klacks-dtd-handler) fn)
429 (setf (slot-value (handler-source handler) 'dom-impl-entity-resolver) fn))
431 (defmethod sax::dtd ((handler klacks-dtd-handler) dtd)
432 (setf (slot-value (handler-source handler) 'dom-impl-dtd) dtd))
434 (defmethod sax:end-dtd ((handler klacks-dtd-handler))
435 (let ((source (handler-source handler)))
436 (when (slot-boundp source 'internal-declarations)
437 (setf (slot-value source 'internal-declarations)
438 (reverse (slot-value source 'internal-declarations)))
439 (setf (slot-value source 'external-declarations)
440 (reverse (slot-value source 'external-declarations))))))
442 (macrolet
443 ((defhandler (name &rest args)
444 `(defmethod ,name ((handler klacks-dtd-handler) ,@args)
445 (let ((source (handler-source handler))
446 (spec (list ',name ,@args)))
447 (if (handler-internal-subset-p handler)
448 (push spec (slot-value source 'internal-declarations))
449 (push spec (slot-value source 'external-declarations)))))))
450 (defhandler sax:unparsed-entity-declaration
451 name public-id system-id notation-name)
452 (defhandler sax:external-entity-declaration
453 kind name public-id system-id)
454 (defhandler sax:internal-entity-declaration
455 kind name value)
456 (defhandler sax:notation-declaration
457 name public-id system-id)
458 (defhandler sax:element-declaration
459 name model)
460 (defhandler sax:attribute-declaration
461 element-name attribute-name type default))
464 ;;;; locator
466 (defun source-xstream (source)
467 (car (zstream-input-stack (main-zstream (slot-value source 'context)))))
469 (defun source-stream-name (source)
470 (let ((xstream (source-xstream source)))
471 (if xstream
472 (xstream-name xstream)
473 nil)))
475 (defmethod klacks:current-line-number ((source cxml-source))
476 (let ((x (source-xstream source)))
477 (if x
478 (xstream-line-number x)
479 nil)))
481 (defmethod klacks:current-column-number ((source cxml-source))
482 (let ((x (source-xstream source)))
483 (if x
484 (xstream-column-number x)
485 nil)))
487 (defmethod klacks:current-system-id ((source cxml-source))
488 (let ((name (source-stream-name source)))
489 (if name
490 (stream-name-uri name)
491 nil)))
493 (defmethod klacks:current-xml-base ((source cxml-source))
494 (car (base-stack (slot-value source 'context))))
496 (defmethod klacks:map-current-namespace-declarations (fn (source cxml-source))
497 (loop
498 for (prefix . uri) in (slot-value source 'current-namespace-declarations)
500 (funcall fn prefix uri)))
502 (defmethod klacks:find-namespace-binding (prefix (source cxml-source))
503 (with-source (source)
504 (find-namespace-binding prefix)))
506 (defmethod klacks:decode-qname (qname (source cxml-source))
507 (with-source (source)
508 (multiple-value-bind (prefix local-name) (split-qname qname)
509 (values (and prefix (find-namespace-binding prefix))
510 local-name
511 prefix))))
514 ;;;; debugging
516 #+(or)
517 (trace CXML::KLACKS/DOCTYPE
518 CXML::KLACKS/EXT-PARSED-ENT
519 CXML::KLACKS/MISC*-2
520 CXML::KLACKS/ENTITY-REFERENCE
521 CXML::KLACKS/ENTITY-REFERENCE-2
522 CXML::KLACKS/ELEMENT
523 CXML::KLACKS/ZTAG
524 CXML::KLACKS/XMLDECL
525 CXML::KLACKS/FINISH-DOCTYPE
526 CXML::KLACKS/ELEMENT-3
527 CXML::KLACKS/EOF
528 CXML::KLACKS/ELEMENT-2
529 CXML::KLACKS/CONTENT )