1 ;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*-
3 ;;;; This file is part of the CXML parser, released under Lisp-LGPL.
4 ;;;; See file COPYING for details.
6 ;;;; Developed 2004 for headcraft - http://headcraft.de/
7 ;;;; Copyright: David Lichteblau
11 ;;; http://www.oasis-open.org/committees/entity/spec.html
14 ;;; - We validate using the Catalog DTD while parsing, which is too strict
15 ;;; and will will fail to parse files using other parser's extensions.
16 ;;; (Jedenfalls behauptet das die Spec.)
17 ;;; A long-term solution might be an XML Schema validator.
19 (defvar *prefer
* :public
)
20 (defvar *default-catalog
*
24 "/usr/local/share/xml/catalog.ports"))
26 (defstruct (catalog (:constructor %make-catalog
()))
28 (dtd-cache (make-dtd-cache))
29 (file-table (puri:make-uri-space
)))
31 (defstruct (entry-file (:conc-name
""))
32 (system-entries) ;extid 2
33 (rewrite-system-entries) ; 3
34 (delegate-system-entries) ; 4
36 (delegate-public-entries) ; 6
38 (rewrite-uri-entries) ; 3
39 (delegate-uri-entries) ; 4
40 (next-catalog-entries) ; 5/7
43 (defun starts-with-p (string prefix
)
44 (let ((mismatch (mismatch string prefix
)))
45 (or (null mismatch
) (= mismatch
(length prefix
)))))
47 (defun normalize-public (str)
48 (setf str
(rod-to-utf8-string (rod str
)))
49 (flet ((whitespacep (c)
50 (find c
#.
(map 'string
#'code-char
'(#x9
#xa
#xd
#x20
)))))
51 (let ((start (position-if-not #'whitespacep str
))
52 (end (position-if-not #'whitespacep str
:from-end t
))
54 (with-output-to-string (out)
56 (loop for i from start to end do
57 (let ((c (char str i
)))
62 (write-char #\space out
)))
65 (write-char c out
))))))))))
67 (defun normalize-uri (str)
68 (when (typep str
'puri
:uri
)
69 (setf str
(puri:render-uri str nil
)))
70 (setf str
(rod-to-utf8-string (rod str
)))
71 (with-output-to-string (out)
72 (loop for ch across str do
73 (let ((c (char-code ch
)))
75 (write-string (string-upcase (format nil
"%~2,'0X" c
)) out
)
76 (write-char ch out
))))))
78 (defun unwrap-publicid (str)
80 (with-output-to-string (out)
81 (let ((i (length "urn:publicid:"))
84 (let ((c (char str i
)))
86 (#\
+ (write-char #\space out
))
87 (#\
: (write-string "//" out
))
88 (#\
; (write-string "::" out))
95 (write-char (code-char code
) out
))
97 (t (write-char c out
))))
100 (defun match-exact (key table
&optional check-prefer
)
102 (destructuring-bind (from to
&optional prefer
) pair
103 (when (and (equal key from
) (or (not check-prefer
) (eq prefer
:public
)))
106 (defun match-prefix/rewrite
(key table
&optional check-prefer
)
110 (destructuring-bind (from to
&optional prefer
) pair
111 (when (and (or (not check-prefer
) (eq prefer
:public
))
112 (starts-with-p key from
)
113 (> (length from
) match-length
))
114 (setf match-length
(length from
))
119 (subseq key match-length
))
122 (defun match-prefix/sorted
(key table
&optional check-prefer
)
125 (destructuring-bind (from to
&optional prefer
) pair
126 (when (and (or (not check-prefer
) (eq prefer
:public
))
127 (starts-with-p key from
))
128 (push (cons (length from
) to
) result
))))
129 (mapcar #'cdr
(sort result
#'> :key
#'car
))))
131 (defun resolve-extid (public system catalog
)
132 (when public
(setf public
(normalize-public public
)))
133 (when system
(setf system
(normalize-uri system
)))
134 (when (and system
(starts-with-p system
"urn:publicid:"))
135 (let ((new-public (unwrap-publicid system
)))
136 (assert (or (null public
) (equal public new-public
)))
137 (setf public new-public
139 (let ((files (catalog-main-files catalog
))
142 (let ((file (pop files
))
144 (unless (typep file
'entry-file
)
145 (setf file
(find-catalog-file file catalog
)))
146 (unless (or (null file
) (member file seen
))
150 (or (match-exact system
(system-entries file
))
151 (match-prefix/rewrite
153 (rewrite-system-entries file
)))))
159 (delegate-system-entries file
)))))
160 (when (and public
(not delegates
))
161 (let* ((check-prefer (and system t
))
164 (public-entries file
)
171 (delegate-public-entries file
)
174 (setf files delegates
)
175 (setf files
(append (next-catalog-entries file
) files
))))))))
177 (defun resolve-uri (uri catalog
)
178 (setf uri
(normalize-uri uri
))
179 (when (starts-with-p uri
"urn:publicid:")
180 (return-from resolve-uri
181 (resolve-extid (unwrap-publicid uri
) nil catalog
)))
182 (let ((files (catalog-main-files catalog
))
185 (let ((file (pop files
)))
186 (unless (typep file
'entry-file
)
187 (setf file
(find-catalog-file file catalog
)))
188 (unless (or (null file
) (member file seen
))
191 (or (match-exact uri
(uri-entries file
))
192 (match-prefix/rewrite uri
(rewrite-uri-entries file
)))))
195 (let* ((delegate-entries
196 (delegate-uri-entries file
))
198 (match-prefix/sorted uri delegate-entries
)))
200 (setf files delegates
)
201 (setf files
(append (next-catalog-entries file
) files
))))))))))
203 (defun find-catalog-file (uri catalog
)
204 (setf uri
(if (stringp uri
) (safe-parse-uri uri
) uri
))
205 (let* ((*dtd-cache
* (catalog-dtd-cache catalog
))
207 (file (parse-catalog-file uri
)))
209 (let ((interned (puri:intern-uri uri
(catalog-file-table catalog
))))
210 (setf (getf (puri:uri-plist interned
) 'catalog
) file
)))
213 (defun make-catalog (&optional
(uris *default-catalog
*))
214 (let ((result (%make-catalog
)))
215 (setf (catalog-main-files result
)
218 for file
= (find-catalog-file uri result
)
219 when file collect file
))
222 (defun parse-catalog-file (uri)
224 (parse-catalog-file/strict uri
)
225 ((or file-error xml-parse-error
) (c)
226 (warn "ignoring catalog error: ~A" c
))))
228 (defparameter *catalog-dtd
*
230 (slot-value (asdf:find-system
:cxml
) 'asdf
::relative-pathname
))
231 (dtd (merge-pathnames "catalog.dtd" cxml
)))
232 (with-open-file (s dtd
:element-type
'(unsigned-byte 8))
234 (make-array (file-length s
) :element-type
'(unsigned-byte 8))))
235 (read-sequence bytes s
)
238 (defun parse-catalog-file/strict
(uri)
239 (let* ((*catalog
* nil
)
241 (puri:parse-uri
"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd")))
242 (flet ((entity-resolver (public system
)
243 (declare (ignore public
))
244 (if (puri:uri
= system dtd-sysid
)
245 (make-octet-input-stream *catalog-dtd
*)
247 (with-open-stream (s (open (uri-to-pathname uri
)
248 :element-type
'(unsigned-byte 8)
251 (make-instance 'catalog-parser
:uri uri
)
253 :dtd
(make-extid nil dtd-sysid
)
255 :entity-resolver
#'entity-resolver
)))))
257 (defclass catalog-parser
()
258 ((result :initform
(make-entry-file) :accessor result
)
259 (next :initform
'() :accessor next
)
260 (prefer-stack :initform
(list *prefer
*) :accessor prefer-stack
)
261 (catalog-base-stack :accessor catalog-base-stack
)))
263 (defmethod initialize-instance :after
264 ((instance catalog-parser
) &key uri
)
265 (setf (catalog-base-stack instance
) (list uri
)))
267 (defmethod prefer ((handler catalog-parser
))
268 (car (prefer-stack handler
)))
270 (defmethod base ((handler catalog-parser
))
271 (car (catalog-base-stack handler
)))
273 (defun get-attribute/lname
(name attributes
)
274 (let ((a (find name attributes
276 (or (sax:attribute-local-name a
)
277 (sax:attribute-qname a
)))
279 (and a
(sax:attribute-value a
))))
281 (defmethod sax:start-element
((handler catalog-parser
) uri lname qname attrs
)
282 (declare (ignore uri
))
283 (setf lname
(or lname qname
))
284 ;; we can dispatch on lnames only because we validate against the DTD,
285 ;; which disallows other namespaces.
286 ;; FIXME: we don't, because we can't.
287 (push (let ((new (get-attribute/lname
"prefer" attrs
)))
289 ((equal new
"public") :public
)
290 ((equal new
"system") :system
)
291 ((null new
) (prefer handler
))))
292 (prefer-stack handler
))
293 (push (string-or (get-attribute/lname
"base" attrs
) (base handler
))
294 (catalog-base-stack handler
))
295 (flet ((geturi (lname)
297 (safe-parse-uri (get-attribute/lname lname attrs
))
300 ((string= lname
"public")
301 (push (list (normalize-public (get-attribute/lname
"publicId" attrs
))
304 (public-entries (result handler
))))
305 ((string= lname
"system")
306 (push (list (normalize-uri (get-attribute/lname
"systemId" attrs
))
308 (system-entries (result handler
))))
309 ((string= lname
"uri")
310 (push (list (normalize-uri (get-attribute/lname
"name" attrs
))
312 (uri-entries (result handler
))))
313 ((string= lname
"rewriteSystem")
314 (push (list (normalize-uri
315 (get-attribute/lname
"systemIdStartString" attrs
))
316 (get-attribute/lname
"rewritePrefix" attrs
))
317 (rewrite-system-entries (result handler
))))
318 ((string= lname
"rewriteURI")
319 (push (list (normalize-uri
320 (get-attribute/lname
"uriStartString" attrs
))
321 (get-attribute/lname
"rewritePrefix" attrs
))
322 (rewrite-uri-entries (result handler
))))
323 ((string= lname
"delegatePublic")
324 (push (list (normalize-public
325 (get-attribute/lname
"publicIdStartString" attrs
))
328 (delegate-public-entries (result handler
))))
329 ((string= lname
"delegateSystem")
330 (push (list (normalize-uri
331 (get-attribute/lname
"systemIdStartString" attrs
))
333 (delegate-system-entries (result handler
))))
334 ((string= lname
"delegateURI")
335 (push (list (normalize-uri
336 (get-attribute/lname
"uriStartString" attrs
))
338 (delegate-uri-entries (result handler
))))
339 ((string= lname
"nextCatalog")
340 (push (geturi "catalog")
341 (next-catalog-entries (result handler
)))))))
343 (defmethod sax:end-element
((handler catalog-parser
) uri lname qname
)
344 (declare (ignore uri lname qname
))
345 (pop (catalog-base-stack handler
))
346 (pop (prefer-stack handler
)))
348 (defmethod sax:end-document
((handler catalog-parser
))