1 ;;; nxml-parse.el --- XML parser, sharing infrastructure with nxml-mode
3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 3, or (at your option)
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02110-1301, USA.
27 ;; Entry point is `nxml-parse-file'.
36 (defvar nxml-parse-file-name nil
)
38 (defvar nxml-validate-function nil
39 "Nil or a function to be called by `nxml-parse-file' to perform validation.
40 The function will be called once for each start-tag or end-tag. The
41 function is passed two arguments TEXT and START-TAG. For a start-tag,
42 START-TAG is a list (NAME ATTRIBUTES) where NAME and ATTRIBUTES are in
43 the same form as returned by `nxml-parse-file. For an end-tag,
44 START-TAG is nil. TEXT is a string containing the text immediately
45 preceding the tag, or nil if there was no such text. An empty element
46 is treated as a start-tag followed by an end-tag.
48 For a start-tag, the namespace state will be the state after
49 processing the namespace declarations in the start-tag. For an
50 end-tag, the namespace state will be the state before popping the
51 namespace declarations for the corresponding start-tag.
53 The function must return nil if no error is detected or a
54 cons (MESSAGE . LOCATION) where MESSAGE is a string containing
55 an error message and LOCATION indicates what caused the error
58 - nil indicates the tag as whole caused it; this is always allowed;
60 - text indicates the text caused it; this is allowed only if
63 - tag-close indicates the close of the tag caused it; this is
64 allowed only if START-TAG is non-nil;
66 - (attribute-name . N) indicates that the name of the Nth attribute
67 caused it; N counts from 0; this is allowed only if START-TAG is non-nil
68 and N must be less than the number of attributes;
70 - (attribute-value . N) indicates that the value of the Nth attribute
71 caused it; N counts from 0; this is allowed only if START-TAG is non-nil
72 and N must be less than the number of attributes.")
74 (defun nxml-parse-file (file)
75 "Parse the XML document in FILE and return it as a list.
76 An XML element is represented as a list (NAME ATTRIBUTES . CHILDREN).
77 NAME is either a string, in the case where the name does not have a
78 namespace, or a cons (NAMESPACE . LOCAL-NAME), where NAMESPACE is a
79 symbol and LOCAL-NAME is a string, in the case where the name does
80 have a namespace. NAMESPACE is a keyword whose name is `:URI', where
81 URI is the namespace name. ATTRIBUTES is an alist of attributes where
82 each attribute has the form (NAME . VALUE), where NAME has the same
83 form as an element name, and VALUE is a string. A namespace
84 declaration is represented as an attribute whose name is
85 \(:http://www.w3.org/2000/xmlns/ . LOCAL-NAME). CHILDREN is a list
86 containing strings and child elements; CHILDREN never contains two
87 consecutive strings and never contains an empty string. Processing
88 instructions and comments are not represented. The return value is a
89 list representing the document element.
91 If the XML document is not well-formed, an error having the condition
92 `nxml-file-parse-error' will be signaled; the error data will be a
93 list of the \(FILE POSITION MESSAGE), where POSITION is an integer
94 specifying the position where the error was detected, and MESSAGE is a
95 string describing the error.
97 The current contents of FILE will be parsed even if there is a
98 modified buffer currently visiting FILE.
100 If the variable `nxml-validation-function' is non-nil, it will be
101 called twice for each element, and any reported error will be signaled
102 in the same way as well-formedness error."
104 (set-buffer (nxml-parse-find-file file
))
106 (let ((nxml-parse-file-name file
))
107 (nxml-parse-instance))
110 (defun nxml-parse-find-file (file)
112 (set-buffer (get-buffer-create " *nXML Parse*"))
114 (let ((set-auto-coding-function 'nxml-set-xml-coding
))
115 (insert-file-contents file
))
118 (defun nxml-parse-instance ()
121 (xmltok-forward-prolog)
122 (nxml-check-xmltok-errors)
124 (nxml-parse-instance-1)))))
126 (defun nxml-parse-instance-1 ()
127 (let* ((top (cons nil nil
))
128 ;; tail is a cons cell, whose cdr is nil
129 ;; additional elements will destructively appended to tail
131 ;; stack of tails one for each open element
133 ;; list of QNames of open elements
135 ;; list of strings buffering a text node, in reverse order
137 ;; position of beginning of first (in buffer) string in text
139 (while (xmltok-forward)
140 (nxml-check-xmltok-errors)
141 (cond ((memq xmltok-type
'(start-tag end-tag empty-element
))
143 (setq text
(apply 'concat
(nreverse text
)))
144 (setcdr tail
(cons text nil
))
145 (setq tail
(cdr tail
)))
146 (when (not (eq xmltok-type
'end-tag
))
147 (when (and (not open-element-tags
)
149 (nxml-parse-error nil
"Multiple top-level elements"))
150 (setq open-element-tags
151 (cons (xmltok-start-tag-qname)
154 (let ((tag (nxml-parse-start-tag)))
155 (nxml-validate-tag text text-pos tag
)
157 (setcdr tail
(cons tag nil
))
158 (setq tail
(cdr tail
))
159 (setq tail-stack
(cons tail tail-stack
))
160 (setq tail
(last tag
))))
161 (when (not (eq xmltok-type
'start-tag
))
162 (or (eq xmltok-type
'empty-element
)
163 (equal (car open-element-tags
)
164 (xmltok-end-tag-qname))
165 (if open-element-tags
166 (nxml-parse-error nil
167 "Unbalanced end-tag; expected </%s>"
168 (car open-element-tags
))
169 (nxml-parse-error nil
"Extra end-tag")))
170 (nxml-validate-tag text text-pos nil
)
173 (setq open-element-tags
(cdr open-element-tags
))
174 (setq tail
(car tail-stack
))
175 (setq tail-stack
(cdr tail-stack
)))
177 ((memq xmltok-type
'(space data entity-ref char-ref cdata-section
))
178 (cond (open-element-tags
180 (setq text-pos xmltok-start
))
182 (cons (nxml-current-text-string) text
)))
183 ((not (eq xmltok-type
'space
))
187 (cdr (assq xmltok-type
188 '((data .
"Text characters")
189 (entity-ref .
"Entity reference")
190 (char-ref .
"Character reference")
191 (cdata-section .
"CDATA section"))))))))))
193 (nxml-parse-error (point-max) "Missing document element"))
196 (defun nxml-parse-start-tag ()
197 (let (parsed-attributes
198 parsed-namespace-attributes
199 atts att prefixes prefix ns value name
)
200 (setq atts xmltok-namespace-attributes
)
202 (setq att
(car atts
))
203 (setq value
(or (xmltok-attribute-value att
)
204 (nxml-parse-error nil
"Invalid attribute value")))
205 (setq ns
(nxml-make-namespace value
))
206 (setq prefix
(and (xmltok-attribute-prefix att
)
207 (xmltok-attribute-local-name att
)))
208 (cond ((member prefix prefixes
)
209 (nxml-parse-error nil
"Duplicate namespace declaration"))
211 (nxml-ns-set-default ns
))
213 (nxml-ns-set-prefix prefix ns
))
214 (t (nxml-parse-error nil
"Cannot undeclare namespace prefix")))
215 (setq prefixes
(cons prefix prefixes
))
216 (setq parsed-namespace-attributes
217 (cons (cons (nxml-make-name nxml-xmlns-namespace-uri
218 (xmltok-attribute-local-name att
))
220 parsed-namespace-attributes
))
221 (setq atts
(cdr atts
)))
224 (let ((prefix (xmltok-start-tag-prefix)))
226 (or (nxml-ns-get-prefix prefix
)
227 (nxml-parse-error (1+ xmltok-start
)
228 "Prefix `%s' undeclared"
230 (nxml-ns-get-default)))
231 (xmltok-start-tag-local-name)))
232 (setq atts xmltok-attributes
)
234 (setq att
(car atts
))
236 (let ((prefix (xmltok-attribute-prefix att
)))
238 (or (nxml-ns-get-prefix prefix
)
239 (nxml-parse-error (xmltok-attribute-name-start att
)
240 "Prefix `%s' undeclared"
242 (setq parsed-attributes
243 (let ((nm (nxml-make-name ns
244 (xmltok-attribute-local-name att
))))
245 (when (assoc nm parsed-attributes
)
246 (nxml-parse-error (xmltok-attribute-name-start att
)
247 "Duplicate attribute"))
248 (cons (cons nm
(or (xmltok-attribute-value att
)
249 (nxml-parse-error nil
"Invalid attribute value")))
251 (setq atts
(cdr atts
)))
252 ;; We want to end up with the attributes followed by the
253 ;; the namespace attributes in the same order as
254 ;; xmltok-attributes and xmltok-namespace-attributes respectively.
255 (when parsed-namespace-attributes
256 (setq parsed-attributes
257 (nconc parsed-namespace-attributes parsed-attributes
)))
258 (list name
(nreverse parsed-attributes
))))
260 (defun nxml-validate-tag (text text-pos tag
)
261 (when nxml-validate-function
262 (let ((err (funcall nxml-validate-function text tag
))
265 (setq pos
(nxml-validate-error-position (cdr err
)
268 (or pos
(error "Incorrect return value from %s"
269 nxml-validate-function
))
270 (nxml-parse-error pos
(car err
))))))
272 (defun nxml-validate-error-position (location text-pos tag
)
273 (cond ((null location
) xmltok-start
)
274 ((eq location
'text
) text-pos
)
275 ((eq location
'tag-close
)
276 (and tag
(- (point) (if (eq xmltok-type
'empty-element
) 2 1))))
278 (let ((att (nth (cdr location
) xmltok-attributes
)))
280 (setq att
(nth (- (cdr location
) (length xmltok-attributes
))
281 xmltok-namespace-attributes
)))
283 ((eq (car location
) 'attribute-name
)
284 (xmltok-attribute-name-start att
))
285 ((eq (car location
) 'attribute-value
)
286 (xmltok-attribute-value-start att
)))))))
288 (defun nxml-make-name (ns local-name
)
293 (defun nxml-current-text-string ()
294 (cond ((memq xmltok-type
'(space data
))
295 (buffer-substring-no-properties xmltok-start
297 ((eq xmltok-type
'cdata-section
)
298 (buffer-substring-no-properties (+ xmltok-start
9)
300 ((memq xmltok-type
'(char-ref entity-ref
))
301 (unless xmltok-replacement
302 (nxml-parse-error nil
303 (if (eq xmltok-type
'char-ref
)
304 "Reference to unsupported Unicode character"
305 "Unresolvable entity reference")))
306 xmltok-replacement
)))
308 (defun nxml-parse-error (position &rest args
)
309 (nxml-signal-file-parse-error nxml-parse-file-name
310 (or position xmltok-start
)
311 (apply 'format args
)))
313 (defun nxml-check-xmltok-errors ()
315 (let ((err (car (last xmltok-errors
))))
316 (nxml-signal-file-parse-error nxml-parse-file-name
317 (xmltok-error-start err
)
318 (xmltok-error-message err
)))))
320 (provide 'nxml-parse
)
322 ;; arch-tag: fc19639b-1bff-4673-9992-f539da89ba1e
323 ;;; nxml-parse.el ends here