1 ;;; -*- show-trailing-whitespace: t; indent-tabs-mode: nil -*-
3 ;;; Copyright (c) 2007,2008 David Lichteblau, Ivan Shvedunov.
4 ;;; All rights reserved.
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 ;;; This file implements whitespace stripping.
33 ;;; Although the spec presents a unified algorithm for whitespace stripping
34 ;;; of stylesheets and source documents, we implement them separately.
36 ;;; For stylesheets, the STP parse tree of the stylesheet is modified
37 ;;; directly according the its xml:space declarations and xsl:text elements.
39 ;;; For source documents, the strip-space and preserve-space declarations
40 ;;; from the stylesheet are taken into account. To avoid processing
41 ;;; parts of the document that XPath would not otherwise have navigated
42 ;;; to, we do whitespace stripping lazily using a proxy implementation
43 ;;; of the XPath protocol.
45 (in-package :xuriella
)
48 (declaim (optimize (debug 2)))
53 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
54 (defparameter *whitespace
*
55 (format nil
"~C~C~C~C"
61 (defun normalize-whitespace (str)
62 (cl-ppcre:regex-replace-all
#.
(format nil
"[~A]+" *whitespace
*)
63 (string-trim *whitespace
* str
)
66 (defun whitespacep (str)
67 (cl-ppcre:all-matches
#.
(format nil
"^[~A]+$" *whitespace
*) str
))
71 (cl-ppcre:split
#.
(format nil
"[~A]+" *whitespace
*)
72 (string-trim *whitespace
* str
))))
75 ;;;; Strip whitespace in stylesheets
77 ;; Also strips comments and PIs.
78 (defun strip-stylesheet (parent &optional preserve
)
80 (loop while
(< i
(length (cxml-stp-impl::%children parent
))) do
81 (let ((child (stp:nth-child i parent
)))
84 (if (and (whitespacep (stp:data child
))
86 (stp:delete-nth-child i parent
)
88 ((or stp
:comment stp
:processing-instruction
)
89 (stp:delete-nth-child i parent
))
91 (stp:with-attributes
((space "space" *xml
*))
95 ((namep child
"text") t
)
96 ((not space
) preserve
)
97 ((equal space
"preserve") t
)
99 (strip-stylesheet child new-preserve
)))
103 ;;;; Strip whitespace in source documents
105 (defun make-whitespace-stripper (node tests
)
107 (make-stripping-node nil node tests nil
)
110 (defstruct (stripping-node (:constructor
#:ignore
))
115 (defstruct (leaf-stripping-node
116 (:constructor make-leaf-stripping-node
(parent target
))
117 (:include stripping-node
)))
119 (defstruct (parent-stripping-node
120 (:constructor make-parent-stripping-node
(parent target
))
121 (:include stripping-node
)))
123 (defmethod print-object ((object stripping-node
) stream
)
124 (print-unreadable-object (object stream
:type t
:identity nil
)
125 (let ((target (write-to-string (stripping-node-target object
))))
126 (if (and (alexandria:starts-with-subseq target
"#<")
127 (alexandria:ends-with
#\
> target
))
128 (write-sequence target stream
:start
3 :end
(1- (length target
)))
129 (write-sequence target stream
)))))
131 (defun strip-under-qname-p (node tests
)
132 (let ((local-name (xpath-protocol:local-name node
))
133 (uri (xpath-protocol:namespace-uri node
)))
134 (dolist (test tests nil
)
135 (let ((result (funcall test local-name uri
)))
137 (return (eq result
:strip
)))))))
139 (defun xpath-protocol/attribute-value
(node local-name uri
)
140 (do-pipe (a (xpath-protocol:attribute-pipe node
))
141 (when (and (equal (xpath-protocol:local-name a
) local-name
)
142 (equal (xpath-protocol:namespace-uri a
) uri
))
143 (return (xpath-protocol:node-text a
)))))
145 (defun make-stripping-node (parent target tests force-preserve
)
146 (let ((result (make-parent-stripping-node parent target
))
147 (xml-space (xpath-protocol/attribute-value target
"space" *xml
*)))
149 (setf force-preserve
(equal xml-space
"preserve")))
150 (labels ((recurse (child-node)
151 (if (xpath-protocol:node-type-p child-node
:element
)
152 (make-stripping-node result child-node tests force-preserve
)
153 (make-leaf-stripping-node result child-node
)))
154 (maybe-recurse (child-node)
155 (if (and (xpath-protocol:node-type-p child-node
:text
)
156 (whitespacep (xpath-protocol:node-text child-node
)))
158 (recurse child-node
))))
159 (let ((all-children (xpath-protocol:child-pipe target
)))
160 (setf (stripping-node-children result
)
161 (if (or force-preserve
162 (not (xpath-protocol:node-type-p target
:element
))
163 (not (strip-under-qname-p target tests
)))
164 (xpath::map-pipe-filtering
#'recurse all-children
)
165 (xpath::map-pipe-filtering
#'maybe-recurse all-children
)))))
168 (macrolet ((defproxy (name &rest args
)
169 `(define-default-method ,name
((node stripping-node
) ,@args
)
170 (,name
(stripping-node-target node
) ,@args
))))
171 (defproxy xpath-protocol
:local-name
)
172 (defproxy xpath-protocol
:namespace-uri
)
173 (defproxy xpath-protocol
:namespace-prefix
)
174 (defproxy xpath-protocol
:qualified-name
)
175 (defproxy xpath-protocol
:attribute-pipe
)
176 (defproxy xpath-protocol
:namespace-pipe
)
177 (defproxy xpath-protocol
:node-type-p type
))
179 (define-default-method xpath-protocol
:node-p
((node stripping-node
))
182 (define-default-method xpath-protocol
:child-pipe
((node stripping-node
))
183 (stripping-node-children node
))
185 (define-default-method xpath-protocol
:parent-node
((node stripping-node
))
186 (stripping-node-parent node
))
188 (define-default-method xpath-protocol
:node-text
((node stripping-node
))
189 (with-output-to-string (s)
190 (write-string-value node s
)))
192 (defmethod write-string-value ((node parent-stripping-node
) stream
)
193 (do-pipe (child (xpath-protocol:child-pipe node
))
194 (unless (or (xpath-protocol:node-type-p child
:comment
)
195 (xpath-protocol:node-type-p child
:processing-instruction
))
196 (write-string-value child stream
))))
198 (defmethod write-string-value ((node leaf-stripping-node
) stream
)
199 (write-string-value (stripping-node-target node
) stream
))
201 (defmethod write-string-value (node stream
)
202 (write-string (xpath-protocol:node-text node
) stream
))
204 (define-default-method xpath-protocol
:get-element-by-id
205 ((node stripping-node
) id
)
207 (xpath-protocol:get-element-by-id
(stripping-node-target node
) id
)))
209 (let ((stripping-root
211 for parent
= node then next
212 for next
= (stripping-node-parent parent
)
214 finally
(return parent
)))
217 for parent
= target then next
218 for next
= (xpath-protocol:parent-node parent
)
220 do
(push parent target-path
))
221 (labels ((find-child (stripping-parent target-child
)
222 (xpath::find-in-pipe target-child
223 (xpath-protocol:child-pipe
225 :key
#'stripping-node-target
))
226 (resolve-path (stripping-parent target-path
)
229 (find-child stripping-parent
(car target-path
))))
231 (resolve-path step
(cdr target-path
))
234 (resolve-path stripping-root target-path
))))))
236 (define-default-method xpath-protocol
:unparsed-entity-uri
237 ((node stripping-node
) name
)
238 (xpath-protocol:unparsed-entity-uri
(stripping-node-target node
) name
))
241 ;;;; TEXT NORMALIZER, from cxml-rng
243 ;;; FIXME: cxml should do that
245 (defun make-text-normalizer (next)
246 (make-instance 'text-normalizer
:chained-handler next
))
248 (defclass text-normalizer
(cxml:sax-proxy
)
249 ((pending-text-node :initform
(make-string-output-stream)
250 :accessor pending-text-node
)))
252 (defmethod sax:characters
((handler text-normalizer
) data
)
253 (write-string data
(pending-text-node handler
)))
255 (defun flush-pending (handler)
256 (let ((str (get-output-stream-string (pending-text-node handler
))))
257 (unless (zerop (length str
))
258 (sax:characters
(cxml:proxy-chained-handler handler
) str
))))
260 (defmethod sax:start-element
:before
261 ((handler text-normalizer
) uri lname qname attributes
)
262 (declare (ignore uri lname qname attributes
))
263 (flush-pending handler
))
265 (defmethod sax:end-element
:before
266 ((handler text-normalizer
) uri lname qname
)
267 (declare (ignore uri lname qname
))
268 (flush-pending handler
))