Implemented exclude-result-prefixes.
[xuriella.git] / space.lisp
blob58dd8af1fc096d5d4ccde7de9a6baacd7bc6cea0
1 ;;; -*- show-trailing-whitespace: t; indent-tabs: nil -*-
3 ;;; Copyright (c) 2007 David Lichteblau. All rights reserved.
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8 ;;;
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11 ;;;
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16 ;;;
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 ;;; This file implements whitespace stripping.
31 ;;;
32 ;;; Although the spec presents a unified algorithm for whitespace stripping
33 ;;; of stylesheets and source documents, we implement them separately.
34 ;;;
35 ;;; For stylesheets, the STP parse tree of the stylesheet is modified
36 ;;; directly according the its xml:space declarations and xsl:text elements.
37 ;;;
38 ;;; For source documents, the strip-space and preserve-space declarations
39 ;;; from the stylesheet are taken into account. To avoid processing
40 ;;; parts of the document that XPath would not otherwise have navigated
41 ;;; to, we do whitespace stripping lazily using a proxy implementation
42 ;;; of the XPath protocol.
44 (in-package :xuriella)
46 #+sbcl
47 (declaim (optimize (debug 2)))
50 ;;;; Helper functions
52 (eval-when (:compile-toplevel :load-toplevel :execute)
53 (defparameter *whitespace*
54 (format nil "~C~C~C~C"
55 (code-char 9)
56 (code-char 32)
57 (code-char 13)
58 (code-char 10))))
60 (defun normalize-whitespace (str)
61 (cl-ppcre:regex-replace-all #.(format nil "[~A]+" *whitespace*)
62 (string-trim *whitespace* str)
63 " "))
65 (defun whitespacep (str)
66 (cl-ppcre:all-matches #.(format nil "^[~A]+$" *whitespace*) str))
68 (defun words (str)
69 (cl-ppcre:split #.(format nil "[~A]+" *whitespace*)
70 (string-trim *whitespace* str)))
73 ;;;; Strip whitespace in stylesheets
75 ;; Also strips comments and PIs.
76 (defun strip-stylesheet (parent &optional preserve)
77 (let ((i 0))
78 (loop while (< i (length (cxml-stp-impl::%children parent))) do
79 (let ((child (stp:nth-child i parent)))
80 (etypecase child
81 (stp:text
82 (if (and (whitespacep (stp:data child))
83 (not preserve))
84 (stp:delete-nth-child i parent)
85 (incf i)))
86 ((or stp:comment stp:processing-instruction)
87 (stp:delete-nth-child i parent))
88 (stp:element
89 (stp:with-attributes ((space "space" *xml*))
90 child
91 (let ((new-preserve
92 (cond
93 ((namep child "text") t)
94 ((not space) preserve)
95 ((equal space "preserve") t)
96 (t nil))))
97 (strip-stylesheet child new-preserve)))
98 (incf i)))))))
101 ;;;; Strip whitespace in source documents
103 (defun make-whitespace-stripper (node tests)
104 (make-stripping-node nil node tests nil))
106 (defstruct (stripping-node
107 (:constructor make-stripping-node/low (parent target)))
108 parent
109 target
110 children)
112 (defmethod print-object ((object stripping-node) stream)
113 (print-unreadable-object (object stream :type t :identity nil)
114 (let ((target (write-to-string (stripping-node-target object))))
115 (if (and (alexandria:starts-with-subseq target "#<")
116 (alexandria:ends-with #\> target))
117 (write-sequence target stream :start 3 :end (1- (length target)))
118 (write-sequence target stream)))))
120 (defun strip-under-qname-p (node tests)
121 (let ((local-name (xpath-protocol:local-name node))
122 (uri (xpath-protocol:namespace-uri node)))
123 (dolist (test tests nil)
124 (let ((result (funcall test local-name uri)))
125 (when result
126 (return (eq result :strip)))))))
128 (defun xpath-protocol/attribute-value (node local-name uri)
129 (do-pipe (a (xpath-protocol:attribute-pipe node))
130 (when (and (equal (xpath-protocol:local-name a) local-name)
131 (equal (xpath-protocol:namespace-uri a) uri))
132 (return (xpath-protocol:string-value a)))))
134 (defun make-stripping-node (parent target tests force-preserve)
135 (let ((result (make-stripping-node/low parent target))
136 (xml-space (xpath-protocol/attribute-value target "space" *xml*)))
137 (when xml-space
138 (setf force-preserve (equal xml-space "preserve")))
139 (labels ((recurse (child-node)
140 (if (xpath-protocol:node-type-p child-node :element)
141 (make-stripping-node result child-node tests force-preserve)
142 child-node))
143 (maybe-recurse (child-node)
144 (if (and (xpath-protocol:node-type-p child-node :text)
145 (whitespacep (xpath-protocol:string-value child-node)))
147 (recurse child-node))))
148 (let ((all-children (xpath-protocol:child-pipe target)))
149 (setf (stripping-node-children result)
150 (if (or force-preserve
151 (not (xpath-protocol:node-type-p target :element))
152 (not (strip-under-qname-p target tests)))
153 (xpath::map-pipe-filtering #'recurse all-children)
154 (xpath::map-pipe-filtering #'maybe-recurse all-children)))))
155 result))
157 (macrolet ((defproxy (name &rest args)
158 `(defmethod ,name ((node stripping-node) ,@args)
159 (,name (stripping-node-target node) ,@args))))
160 (defproxy xpath-protocol:local-name)
161 (defproxy xpath-protocol:namespace-uri)
162 (defproxy xpath-protocol:namespace-prefix)
163 (defproxy xpath-protocol:qualified-name)
164 (defproxy xpath-protocol:attribute-pipe)
165 (defproxy xpath-protocol:namespace-pipe)
166 (defproxy xpath-protocol:node-type-p type))
168 (defmethod xpath-protocol:child-pipe ((node stripping-node))
169 (stripping-node-children node))
171 (defmethod xpath-protocol:parent-node ((node stripping-node))
172 (stripping-node-parent node))
174 (defmethod xpath-protocol:string-value ((node stripping-node))
175 (with-output-to-string (s)
176 (write-string-value node s)))
178 (defmethod write-string-value ((node stripping-node) stream)
179 (do-pipe (child (xpath-protocol:child-pipe node))
180 (unless (or (xpath-protocol:node-type-p child :comment)
181 (xpath-protocol:node-type-p child :processing-instruction))
182 (write-string-value child stream))))
184 (defmethod write-string-value (node stream)
185 (write-string (xpath-protocol:string-value node) stream))