1 ;;; -*- Mode: Lisp; readtable: runes; -*-
2 ;;; (c) copyright 2007 David Lichteblau
4 ;;; This library is free software; you can redistribute it and/or
5 ;;; modify it under the terms of the GNU Library General Public
6 ;;; License as published by the Free Software Foundation; either
7 ;;; version 2 of the License, or (at your option) any later version.
9 ;;; This library is distributed in the hope that it will be useful,
10 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ;;; Library General Public License for more details.
14 ;;; You should have received a copy of the GNU Library General Public
15 ;;; License along with this library; if not, write to the
16 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ;;; Boston, MA 02111-1307 USA.
21 (defclass cxml-source
(klacks:source
)
22 (;; args to make-source
23 (context :initarg
:context
)
24 (validate :initarg
:validate
)
27 (error-culprit :initarg
:error-culprit
)
30 (current-key :initform nil
)
33 (cdata-section-p :reader klacks
:current-cdata-section-p
)
34 ;; extra WITH-SOURCE magic
35 (data-behaviour :initform
:DTD
)
36 (namespace-stack :initform
(list *initial-namespace-bindings
*))
37 (current-namespace-declarations)
38 (temporary-streams :initform nil
)
39 (scratch-pad :initarg
:scratch-pad
)
40 (scratch-pad-2 :initarg
:scratch-pad-2
)
41 (scratch-pad-3 :initarg
:scratch-pad-3
)
42 (scratch-pad-4 :initarg
:scratch-pad-4
)))
44 (defmethod klacks:close-source
((source cxml-source
))
45 (dolist (xstream (slot-value source
'temporary-streams
))
46 ;; fixme: error handling?
47 (close-xstream xstream
)))
49 (defmacro with-source
((source &rest slots
) &body body
)
52 (*ctx
* (slot-value ,s
'context
))
53 (*validate
* (slot-value ,s
'validate
))
54 (*data-behaviour
* (slot-value source
'data-behaviour
))
55 (*namespace-bindings
* (car (slot-value source
'namespace-stack
)))
56 (*scratch-pad
* (slot-value source
'scratch-pad
))
57 (*scratch-pad-2
* (slot-value source
'scratch-pad-2
))
58 (*scratch-pad-3
* (slot-value source
'scratch-pad-3
))
59 (*scratch-pad-4
* (slot-value source
'scratch-pad-4
)))
61 (with-slots (,@slots
) ,s
63 (runes-encoding:encoding-error
(c)
64 (wf-error (slot-value ,s
'error-culprit
) "~A" c
))))))
66 (defun fill-source (source)
67 (with-slots (current-key current-values continuation
) source
69 (setf current-key
:bogus
)
70 (setf continuation
(funcall continuation
))
71 (assert (not (eq current-key
:bogus
))))))
73 (defmethod klacks:peek
((source cxml-source
))
74 (with-source (source current-key current-values
)
76 (apply #'values current-key current-values
)))
78 (defmethod klacks:peek-value
((source cxml-source
))
79 (with-source (source current-key current-values
)
81 (apply #'values current-values
)))
83 (defmethod klacks:peek-next
((source cxml-source
))
84 (with-source (source current-key current-values
)
85 (setf current-key nil
)
87 (apply #'values current-key current-values
)))
89 (defmethod klacks:consume
((source cxml-source
))
90 (with-source (source current-key current-values
)
93 (apply #'values current-key current-values
)
94 (setf current-key nil
))))
96 (defmethod klacks:map-attributes
(fn (source cxml-source
))
97 (dolist (a (slot-value source
'current-attributes
))
99 (sax:attribute-namespace-uri a
)
100 (sax:attribute-local-name a
)
101 (sax:attribute-qname a
)
102 (sax:attribute-value a
)
103 (sax:attribute-specified-p a
))))
105 (defmethod klacks:get-attribute
106 ((source cxml-source
) lname
&optional uri
)
107 (dolist (a (slot-value source
'current-attributes
))
108 (when (and (equal (sax:attribute-local-name a
) lname
)
109 (equal (sax:attribute-namespace-uri a
) uri
))
110 (return (sax:attribute-value a
)))))
112 (defmethod klacks:list-attributes
((source cxml-source
))
113 (slot-value source
'current-attributes
))
117 &key validate dtd root entity-resolver disallow-internal-subset
118 (buffering t
) pathname
)
119 (declare (ignore validate dtd root entity-resolver disallow-internal-subset
))
122 (when (and (not buffering
) (< 1 (runes::xstream-speed input
)))
123 (warn "make-source called with !buffering, but xstream is buffering"))
125 (let ((zstream (make-zstream :input-stack
(list input
))))
127 (with-scratch-pads ()
128 (apply #'%make-source
131 for
(name value
) on args by
#'cddr
132 unless
(member name
'(:pathname
:buffering
))
133 append
(list name value
)))))))
135 (let ((xstream (make-xstream input
:speed
(if buffering
8192 1))))
136 (setf (xstream-name xstream
)
138 :entity-name
"main document"
140 :uri
(pathname-to-uri
141 (merge-pathnames (or pathname
(pathname input
))))))
142 (apply #'make-source xstream args
)))
145 (make-xstream (open input
:element-type
'(unsigned-byte 8))
146 :speed
(if buffering
8192 1))))
147 (setf (xstream-name xstream
)
149 :entity-name
"main document"
151 :uri
(pathname-to-uri (merge-pathnames input
))))
152 (let ((source (apply #'make-source
156 (push xstream
(slot-value source
'temporary-streams
))
159 (let ((xstream (string->xstream input
)))
160 (setf (xstream-name xstream
)
162 :entity-name
"main document"
165 (apply #'make-source xstream args
)))
167 (make-source (cxml::make-octet-input-stream input
)))))
170 (input &key validate dtd root entity-resolver disallow-internal-subset
172 ;; check types of user-supplied arguments for better error messages:
173 (check-type validate boolean
)
174 (check-type dtd
(or null extid
))
175 (check-type root
(or null rod
))
176 (check-type entity-resolver
(or null function symbol
))
177 (check-type disallow-internal-subset boolean
)
178 (let* ((xstream (car (zstream-input-stack input
)))
179 (name (xstream-name xstream
))
180 (base (when name
(stream-name-uri name
)))
182 (make-context :main-zstream input
183 :entity-resolver entity-resolver
184 :base-stack
(list (or base
""))
185 :disallow-internal-subset disallow-internal-subset
))
187 (make-instance 'cxml-source
192 :error-culprit error-culprit
193 :scratch-pad
*scratch-pad
*
194 :scratch-pad-2
*scratch-pad-2
*
195 :scratch-pad-3
*scratch-pad-3
*
196 :scratch-pad-4
*scratch-pad-4
*)))
197 (setf (handler context
) (make-instance 'klacks-dtd-handler
:source source
))
198 (setf (slot-value source
'continuation
)
199 (lambda () (klacks/xmldecl source input
)))
202 (defun klacks/xmldecl
(source input
)
203 (with-source (source current-key current-values
)
204 (let ((hd (p/xmldecl input
)))
205 (setf current-key
:start-document
)
208 (list (xml-header-version hd
)
209 (xml-header-encoding hd
)
210 (xml-header-standalone-p hd
))))
212 (klacks/misc
*-
2 source input
214 (klacks/doctype source input
)))))))
216 (defun klacks/misc
*-
2 (source input successor
)
217 (with-source (source current-key current-values
)
218 (multiple-value-bind (cat sem
) (peek-token input
)
221 (setf current-key
:comment
)
222 (setf current-values
(list sem
))
223 (consume-token input
)
224 (lambda () (klacks/misc
*-
2 source input successor
)))
226 (setf current-key
:processing-instruction
)
227 (setf current-values
(list (car sem
) (cdr sem
)))
228 (consume-token input
)
229 (lambda () (klacks/misc
*-
2 source input successor
)))
231 (consume-token input
)
232 (klacks/misc
*-
2 source input successor
))
234 (funcall successor
))))))
236 (defun klacks/doctype
(source input
)
237 (with-source (source current-key current-values validate dtd
)
238 (let ((cont (lambda () (klacks/finish-doctype source input
)))
242 ((eq (peek-token input
) :<!DOCTYPE
)
243 (setf l
(cdr (p/doctype-decl input dtd
)))
244 (lambda () (klacks/misc
*-
2 source input cont
)))
246 (setf l
(cdr (synthesize-doctype dtd input
)))
248 ((and validate
(not dtd
))
249 (validity-error "invalid document: no doctype"))
251 (return-from klacks
/doctype
253 (destructuring-bind (&optional name extid
) l
254 (setf current-key
:dtd
)
257 (and extid
(extid-public extid
))
258 (and extid
(extid-system extid
)))))))))
260 (defun klacks/finish-doctype
(source input
)
261 (with-source (source current-key current-values root data-behaviour
)
264 (setf (model-stack *ctx
*) (list (make-root-model root
))))
265 (setf data-behaviour
:DOC
)
266 (setf *data-behaviour
* :DOC
)
270 (klacks/eof source input
)))
273 (setf data-behaviour
:DTD
)
274 (setf *data-behaviour
* :DTD
)
275 (klacks/misc
*-
2 source input final
))))
276 (klacks/element source input next
))))
278 (defun klacks/eof
(source input
)
279 (with-source (source current-key current-values
)
281 (klacks:close-source source
)
282 (setf current-key
:end-document
)
283 (setf current-values nil
)
284 (lambda () (klacks/nil source
))))
286 (defun klacks/nil
(source)
287 (with-source (source current-key current-values
)
288 (setf current-key nil
)
289 (setf current-values nil
)
290 (labels ((klacks/done
()
291 (setf current-key nil
)
292 (setf current-values nil
)
296 (defun klacks/element
(source input cont
)
297 (with-source (source current-key current-values current-attributes
298 current-namespace-declarations
)
299 (multiple-value-bind (cat n-b new-b uri lname qname attrs
) (p/sztag input
)
300 (setf current-key
:start-element
)
301 (setf current-values
(list uri lname qname
))
302 (setf current-attributes attrs
)
303 (setf current-namespace-declarations new-b
)
306 (klacks/element-2 source input n-b cont
))
308 (klacks/ztag source cont
))))))
310 (defun klacks/ztag
(source cont
)
311 (with-source (source current-key current-values current-attributes
)
312 (setf current-key
:end-element
)
313 (setf current-attributes nil
)
314 (validate-end-element *ctx
* (third current-values
))
317 (defun klacks/element-2
(source input n-b cont
)
319 current-key current-values current-attributes namespace-stack
320 current-namespace-declarations
)
321 (let ((values* current-values
)
322 (new-b current-namespace-declarations
))
323 (setf current-attributes nil
)
324 (push n-b namespace-stack
)
327 (setf current-namespace-declarations new-b
)
328 (klacks/element-3 source input values
* cont
))))
329 (klacks/content source input finish
)))))
331 (defun klacks/element-3
(source input tag-values cont
)
332 (with-source (source current-key current-values current-attributes
)
333 (setf current-key
:end-element
)
334 (setf current-values tag-values
)
335 (let ((qname (third tag-values
)))
337 (validate-end-element *ctx
* qname
))
340 (defun klacks/content
(source input cont
)
341 (with-source (source current-key current-values cdata-section-p
)
342 (let ((recurse (lambda () (klacks/content source input cont
))))
343 (multiple-value-bind (cat sem
) (peek-token input
)
346 (klacks/element source input recurse
))
348 (process-characters input sem
)
349 (setf current-key
:characters
)
350 (setf current-values
(list sem
))
351 (setf cdata-section-p nil
)
355 (consume-token input
)
356 (klacks/entity-reference source input name recurse
)))
358 (setf current-key
:characters
)
359 (setf current-values
(list (process-cdata-section input
)))
360 (setf cdata-section-p t
)
363 (setf current-key
:processing-instruction
)
364 (setf current-values
(list (car sem
) (cdr sem
)))
365 (consume-token input
)
368 (setf current-key
:comment
)
369 (setf current-values
(list sem
))
370 (consume-token input
)
375 (defun klacks/entity-reference
(source zstream name cont
)
376 (assert (not (zstream-token-category zstream
)))
377 (with-source (source temporary-streams context
)
378 (let ((new-xstream (entity->xstream zstream name
:general nil
)))
379 (push new-xstream temporary-streams
)
380 (push :stop
(zstream-input-stack zstream
))
381 (zstream-push new-xstream zstream
)
382 (push (stream-name-uri (xstream-name new-xstream
)) (base-stack context
))
385 (klacks/entity-reference-2 source zstream new-xstream cont
))))
386 (etypecase (checked-get-entdef name
:general
)
388 (klacks/content source zstream next
))
390 (klacks/ext-parsed-ent source zstream next
)))))))
392 (defun klacks/entity-reference-2
(source zstream new-xstream cont
)
393 (with-source (source temporary-streams context
)
394 (unless (eq (peek-token zstream
) :eof
)
395 (wf-error zstream
"Trailing garbage. - ~S" (peek-token zstream
)))
396 (assert (eq (peek-token zstream
) :eof
))
397 (assert (eq (pop (zstream-input-stack zstream
)) new-xstream
))
398 (assert (eq (pop (zstream-input-stack zstream
)) :stop
))
399 (pop (base-stack context
))
400 (setf (zstream-token-category zstream
) nil
)
401 (setf temporary-streams
(remove new-xstream temporary-streams
))
402 (close-xstream new-xstream
)
405 (defun klacks/ext-parsed-ent
(source input cont
)
406 (with-source (source)
407 (when (eq (peek-token input
) :xml-decl
)
408 (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input
))))))
409 (setup-encoding input hd
))
410 (consume-token input
))
411 (set-full-speed input
)
412 (klacks/content source input cont
)))
415 ;;;; terrible kludges
417 (defclass klacks-dtd-handler
()
418 ((handler-source :initarg
:source
:reader handler-source
)
419 (internal-subset-p :initform nil
:accessor handler-internal-subset-p
)))
421 (defmethod sax:start-internal-subset
((handler klacks-dtd-handler
))
422 (setf (slot-value (handler-source handler
) 'internal-declarations
) '())
423 (setf (handler-internal-subset-p handler
) t
))
425 (defmethod sax:end-internal-subset
((handler klacks-dtd-handler
))
426 (setf (handler-internal-subset-p handler
) nil
))
428 (defmethod sax:entity-resolver
((handler klacks-dtd-handler
) fn
)
429 (setf (slot-value (handler-source handler
) 'dom-impl-entity-resolver
) fn
))
431 (defmethod sax::dtd
((handler klacks-dtd-handler
) dtd
)
432 (setf (slot-value (handler-source handler
) 'dom-impl-dtd
) dtd
))
434 (defmethod sax:end-dtd
((handler klacks-dtd-handler
))
435 (let ((source (handler-source handler
)))
436 (when (slot-boundp source
'internal-declarations
)
437 (setf (slot-value source
'internal-declarations
)
438 (reverse (slot-value source
'internal-declarations
)))
439 (setf (slot-value source
'external-declarations
)
440 (reverse (slot-value source
'external-declarations
))))))
443 ((defhandler (name &rest args
)
444 `(defmethod ,name
((handler klacks-dtd-handler
) ,@args
)
445 (let ((source (handler-source handler
))
446 (spec (list ',name
,@args
)))
447 (if (handler-internal-subset-p handler
)
448 (push spec
(slot-value source
'internal-declarations
))
449 (push spec
(slot-value source
'external-declarations
)))))))
450 (defhandler sax
:unparsed-entity-declaration
451 name public-id system-id notation-name
)
452 (defhandler sax
:external-entity-declaration
453 kind name public-id system-id
)
454 (defhandler sax
:internal-entity-declaration
456 (defhandler sax
:notation-declaration
457 name public-id system-id
)
458 (defhandler sax
:element-declaration
460 (defhandler sax
:attribute-declaration
461 element-name attribute-name type default
))
466 (defun source-xstream (source)
467 (car (zstream-input-stack (main-zstream (slot-value source
'context
)))))
469 (defun source-stream-name (source)
470 (let ((xstream (source-xstream source
)))
472 (xstream-name xstream
)
475 (defmethod klacks:current-line-number
((source cxml-source
))
476 (let ((x (source-xstream source
)))
478 (xstream-line-number x
)
481 (defmethod klacks:current-column-number
((source cxml-source
))
482 (let ((x (source-xstream source
)))
484 (xstream-column-number x
)
487 (defmethod klacks:current-system-id
((source cxml-source
))
488 (let ((name (source-stream-name source
)))
490 (stream-name-uri name
)
493 (defmethod klacks:current-xml-base
((source cxml-source
))
494 (car (base-stack (slot-value source
'context
))))
496 (defmethod klacks:map-current-namespace-declarations
(fn (source cxml-source
))
498 for
(prefix . uri
) in
(slot-value source
'current-namespace-declarations
)
500 (funcall fn prefix uri
)))
502 (defmethod klacks:find-namespace-binding
(prefix (source cxml-source
))
503 (with-source (source)
504 (find-namespace-binding prefix
)))
506 (defmethod klacks:decode-qname
(qname (source cxml-source
))
507 (with-source (source)
508 (multiple-value-bind (prefix local-name
) (split-qname qname
)
509 (values (and prefix
(find-namespace-binding prefix
))
517 (trace CXML
::KLACKS
/DOCTYPE
518 CXML
::KLACKS
/EXT-PARSED-ENT
520 CXML
::KLACKS
/ENTITY-REFERENCE
521 CXML
::KLACKS
/ENTITY-REFERENCE-2
525 CXML
::KLACKS
/FINISH-DOCTYPE
526 CXML
::KLACKS
/ELEMENT-3
528 CXML
::KLACKS
/ELEMENT-2
529 CXML
::KLACKS
/CONTENT
)