various docstrings; release
[cxml.git] / xml / catalog.lisp
blob1828914cbbf94aa59d1e5f3b5157606a7308e978
1 ;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*-
2 ;;;;
3 ;;;; This file is part of the CXML parser, released under Lisp-LGPL.
4 ;;;; See file COPYING for details.
5 ;;;;
6 ;;;; Developed 2004 for headcraft - http://headcraft.de/
7 ;;;; Copyright: David Lichteblau
9 (in-package :cxml)
11 ;;; http://www.oasis-open.org/committees/entity/spec.html
12 ;;;
13 ;;; Bugs:
14 ;;; - We validate using the Catalog DTD while parsing, which is too strict
15 ;;; and will will fail to parse files using other parser's extensions.
16 ;;; (Jedenfalls behauptet das die Spec.)
17 ;;; A long-term solution might be an XML Schema validator.
19 (defvar *prefer* :public)
20 (defvar *default-catalog*
21 '(;; libxml standard
22 "/etc/xml/catalog"
23 ;; FreeBSD
24 "/usr/local/share/xml/catalog.ports"))
26 (defstruct (catalog (:constructor %make-catalog ()))
27 main-files
28 (dtd-cache (make-dtd-cache))
29 (file-table (puri:make-uri-space)))
31 (defstruct (entry-file (:conc-name ""))
32 (system-entries) ;extid 2
33 (rewrite-system-entries) ; 3
34 (delegate-system-entries) ; 4
35 (public-entries) ; 5
36 (delegate-public-entries) ; 6
37 (uri-entries) ;uri 2
38 (rewrite-uri-entries) ; 3
39 (delegate-uri-entries) ; 4
40 (next-catalog-entries) ; 5/7
43 (defun starts-with-p (string prefix)
44 (let ((mismatch (mismatch string prefix)))
45 (or (null mismatch) (= mismatch (length prefix)))))
47 (defun normalize-public (str)
48 (setf str (rod-to-utf8-string (rod str)))
49 (flet ((whitespacep (c)
50 (find c #.(map 'string #'code-char '(#x9 #xa #xd #x20)))))
51 (let ((start (position-if-not #'whitespacep str))
52 (end (position-if-not #'whitespacep str :from-end t))
53 (spacep nil))
54 (with-output-to-string (out)
55 (when start
56 (loop for i from start to end do
57 (let ((c (char str i)))
58 (cond
59 ((whitespacep c)
60 (unless spacep
61 (setf spacep t)
62 (write-char #\space out)))
64 (setf spacep nil)
65 (write-char c out))))))))))
67 (defun normalize-uri (str)
68 (when (typep str 'puri:uri)
69 (setf str (puri:render-uri str nil)))
70 (setf str (rod-to-utf8-string (rod str)))
71 (with-output-to-string (out)
72 (loop for ch across str do
73 (let ((c (char-code ch)))
74 (if (< c 15)
75 (write-string (string-upcase (format nil "%~2,'0X" c)) out)
76 (write-char ch out))))))
78 (defun unwrap-publicid (str)
79 (normalize-public
80 (with-output-to-string (out)
81 (let ((i (length "urn:publicid:"))
82 (n (length str)))
83 (while (< i n)
84 (let ((c (char str i)))
85 (case c
86 (#\+ (write-char #\space out))
87 (#\: (write-string "//" out))
88 (#\; (write-string "::" out))
89 (#\%
90 (let ((code
91 (parse-integer str
92 :start (+ i 1)
93 :end (+ i 3)
94 :radix 16)))
95 (write-char (code-char code) out))
96 (incf i 2))
97 (t (write-char c out))))
98 (incf i))))))
100 (defun match-exact (key table &optional check-prefer)
101 (dolist (pair table)
102 (destructuring-bind (from to &optional prefer) pair
103 (when (and (equal key from) (or (not check-prefer) (eq prefer :public)))
104 (return to)))))
106 (defun match-prefix/rewrite (key table &optional check-prefer)
107 (let ((match nil)
108 (match-length -1))
109 (dolist (pair table)
110 (destructuring-bind (from to &optional prefer) pair
111 (when (and (or (not check-prefer) (eq prefer :public))
112 (starts-with-p key from)
113 (> (length from) match-length))
114 (setf match-length (length from))
115 (setf match to))))
116 (if match
117 (concatenate 'string
118 match
119 (subseq key match-length))
120 nil)))
122 (defun match-prefix/sorted (key table &optional check-prefer)
123 (let ((result '()))
124 (dolist (pair table)
125 (destructuring-bind (from to &optional prefer) pair
126 (when (and (or (not check-prefer) (eq prefer :public))
127 (starts-with-p key from))
128 (push (cons (length from) to) result))))
129 (mapcar #'cdr (sort result #'> :key #'car))))
131 (defun resolve-extid (public system catalog)
132 (when public (setf public (normalize-public public)))
133 (when system (setf system (normalize-uri system)))
134 (when (and system (starts-with-p system "urn:publicid:"))
135 (let ((new-public (unwrap-publicid system)))
136 (assert (or (null public) (equal public new-public)))
137 (setf public new-public
138 system nil)))
139 (let ((files (catalog-main-files catalog))
140 (seen '()))
141 (while files
142 (let ((file (pop files))
143 (delegates nil))
144 (unless (typep file 'entry-file)
145 (setf file (find-catalog-file file catalog)))
146 (unless (or (null file) (member file seen))
147 (push file seen)
148 (when system
149 (let ((result
150 (or (match-exact system (system-entries file))
151 (match-prefix/rewrite
152 system
153 (rewrite-system-entries file)))))
154 (when result
155 (return result))
156 (setf delegates
157 (match-prefix/sorted
158 system
159 (delegate-system-entries file)))))
160 (when (and public (not delegates))
161 (let* ((check-prefer (and system t))
162 (result
163 (match-exact public
164 (public-entries file)
165 check-prefer)))
166 (when result
167 (return result))
168 (setf delegates
169 (match-prefix/sorted
170 public
171 (delegate-public-entries file)
172 check-prefer))))
173 (if delegates
174 (setf files delegates)
175 (setf files (append (next-catalog-entries file) files))))))))
177 (defun resolve-uri (uri catalog)
178 (setf uri (normalize-uri uri))
179 (when (starts-with-p uri "urn:publicid:")
180 (return-from resolve-uri
181 (resolve-extid (unwrap-publicid uri) nil catalog)))
182 (let ((files (catalog-main-files catalog))
183 (seen '()))
184 (while files
185 (let ((file (pop files)))
186 (unless (typep file 'entry-file)
187 (setf file (find-catalog-file file catalog)))
188 (unless (or (null file) (member file seen))
189 (push file seen)
190 (let ((result
191 (or (match-exact uri (uri-entries file))
192 (match-prefix/rewrite uri (rewrite-uri-entries file)))))
193 (when result
194 (return result))
195 (let* ((delegate-entries
196 (delegate-uri-entries file))
197 (delegates
198 (match-prefix/sorted uri delegate-entries)))
199 (if delegates
200 (setf files delegates)
201 (setf files (append (next-catalog-entries file) files))))))))))
203 (defun find-catalog-file (uri catalog)
204 (setf uri (if (stringp uri) (safe-parse-uri uri) uri))
205 (let* ((*dtd-cache* (catalog-dtd-cache catalog))
206 (*cache-all-dtds* t)
207 (file (parse-catalog-file uri)))
208 (when file
209 (let ((interned (puri:intern-uri uri (catalog-file-table catalog))))
210 (setf (getf (puri:uri-plist interned) 'catalog) file)))
211 file))
213 (defun make-catalog (&optional (uris *default-catalog*))
214 (let ((result (%make-catalog)))
215 (setf (catalog-main-files result)
216 (loop
217 for uri in uris
218 for file = (find-catalog-file uri result)
219 when file collect file))
220 result))
222 (defun parse-catalog-file (uri)
223 (handler-case
224 (parse-catalog-file/strict uri)
225 ((or file-error xml-parse-error) (c)
226 (warn "ignoring catalog error: ~A" c))))
228 (defparameter *catalog-dtd*
229 (let* ((cxml
230 (slot-value (asdf:find-system :cxml) 'asdf::relative-pathname))
231 (dtd (merge-pathnames "catalog.dtd" cxml)))
232 (with-open-file (s dtd :element-type '(unsigned-byte 8))
233 (let ((bytes
234 (make-array (file-length s) :element-type '(unsigned-byte 8))))
235 (read-sequence bytes s)
236 bytes))))
238 (defun parse-catalog-file/strict (uri)
239 (let* ((*catalog* nil)
240 (dtd-sysid
241 (puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd")))
242 (flet ((entity-resolver (public system)
243 (declare (ignore public))
244 (if (puri:uri= system dtd-sysid)
245 (make-octet-input-stream *catalog-dtd*)
246 nil)))
247 (with-open-stream (s (open (uri-to-pathname uri)
248 :element-type '(unsigned-byte 8)
249 :direction :input))
250 (parse-stream s
251 (make-instance 'catalog-parser :uri uri)
252 :validate nil
253 :dtd (make-extid nil dtd-sysid)
254 :root #"catalog"
255 :entity-resolver #'entity-resolver)))))
257 (defclass catalog-parser ()
258 ((result :initform (make-entry-file) :accessor result)
259 (next :initform '() :accessor next)
260 (prefer-stack :initform (list *prefer*) :accessor prefer-stack)
261 (catalog-base-stack :accessor catalog-base-stack)))
263 (defmethod initialize-instance :after
264 ((instance catalog-parser) &key uri)
265 (setf (catalog-base-stack instance) (list uri)))
267 (defmethod prefer ((handler catalog-parser))
268 (car (prefer-stack handler)))
270 (defmethod base ((handler catalog-parser))
271 (car (catalog-base-stack handler)))
273 (defun get-attribute/lname (name attributes)
274 (let ((a (find name attributes
275 :key (lambda (a)
276 (or (sax:attribute-local-name a)
277 (sax:attribute-qname a)))
278 :test #'string=)))
279 (and a (sax:attribute-value a))))
281 (defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs)
282 (declare (ignore uri))
283 (setf lname (or lname qname))
284 ;; we can dispatch on lnames only because we validate against the DTD,
285 ;; which disallows other namespaces.
286 ;; FIXME: we don't, because we can't.
287 (push (let ((new (get-attribute/lname "prefer" attrs)))
288 (cond
289 ((equal new "public") :public)
290 ((equal new "system") :system)
291 ((null new) (prefer handler))))
292 (prefer-stack handler))
293 (push (string-or (get-attribute/lname "base" attrs) (base handler))
294 (catalog-base-stack handler))
295 (flet ((geturi (lname)
296 (puri:merge-uris
297 (safe-parse-uri (get-attribute/lname lname attrs))
298 (base handler))))
299 (cond
300 ((string= lname "public")
301 (push (list (normalize-public (get-attribute/lname "publicId" attrs))
302 (geturi "uri")
303 (prefer handler))
304 (public-entries (result handler))))
305 ((string= lname "system")
306 (push (list (normalize-uri (get-attribute/lname "systemId" attrs))
307 (geturi "uri"))
308 (system-entries (result handler))))
309 ((string= lname "uri")
310 (push (list (normalize-uri (get-attribute/lname "name" attrs))
311 (geturi "uri"))
312 (uri-entries (result handler))))
313 ((string= lname "rewriteSystem")
314 (push (list (normalize-uri
315 (get-attribute/lname "systemIdStartString" attrs))
316 (get-attribute/lname "rewritePrefix" attrs))
317 (rewrite-system-entries (result handler))))
318 ((string= lname "rewriteURI")
319 (push (list (normalize-uri
320 (get-attribute/lname "uriStartString" attrs))
321 (get-attribute/lname "rewritePrefix" attrs))
322 (rewrite-uri-entries (result handler))))
323 ((string= lname "delegatePublic")
324 (push (list (normalize-public
325 (get-attribute/lname "publicIdStartString" attrs))
326 (geturi "catalog")
327 (prefer handler))
328 (delegate-public-entries (result handler))))
329 ((string= lname "delegateSystem")
330 (push (list (normalize-uri
331 (get-attribute/lname "systemIdStartString" attrs))
332 (geturi "catalog"))
333 (delegate-system-entries (result handler))))
334 ((string= lname "delegateURI")
335 (push (list (normalize-uri
336 (get-attribute/lname "uriStartString" attrs))
337 (geturi "catalog"))
338 (delegate-uri-entries (result handler))))
339 ((string= lname "nextCatalog")
340 (push (geturi "catalog")
341 (next-catalog-entries (result handler)))))))
343 (defmethod sax:end-element ((handler catalog-parser) uri lname qname)
344 (declare (ignore uri lname qname))
345 (pop (catalog-base-stack handler))
346 (pop (prefer-stack handler)))
348 (defmethod sax:end-document ((handler catalog-parser))
349 (result handler))