1 ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions
3 ;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 (eval-when-compile (require 'subr-x
))
30 (defsubst dom-tag
(node)
31 "Return the NODE tag."
32 ;; Called on a list of nodes. Use the first.
33 (if (consp (car node
))
37 (defsubst dom-attributes
(node)
38 "Return the NODE attributes."
39 ;; Called on a list of nodes. Use the first.
40 (if (consp (car node
))
44 (defsubst dom-children
(node)
45 "Return the NODE children."
46 ;; Called on a list of nodes. Use the first.
47 (if (consp (car node
))
51 (defun dom-non-text-children (node)
52 "Return all non-text-node children of NODE."
53 (cl-loop for child in
(dom-children node
)
54 unless
(stringp child
)
57 (defun dom-set-attributes (node attributes
)
58 "Set the attributes of NODE to ATTRIBUTES."
59 (setq node
(dom-ensure-node node
))
60 (setcar (cdr node
) attributes
))
62 (defun dom-set-attribute (node attribute value
)
63 "Set ATTRIBUTE in NODE to VALUE."
64 (setq node
(dom-ensure-node node
))
65 (let ((old (assoc attribute
(cadr node
))))
68 (setcar (cdr node
) (nconc (cadr node
) (list (cons attribute value
)))))))
70 (defmacro dom-attr
(node attr
)
71 "Return the attribute ATTR from NODE.
72 A typical attribute is `href'."
73 `(cdr (assq ,attr
(dom-attributes ,node
))))
75 (defun dom-text (node)
76 "Return all the text bits in the current node concatenated."
77 (mapconcat 'identity
(cl-remove-if-not 'stringp
(dom-children node
)) " "))
79 (defun dom-texts (node &optional separator
)
80 "Return all textual data under NODE concatenated with SEPARATOR in-between."
87 (dom-texts elem separator
)))
91 (defun dom-child-by-tag (dom tag
)
92 "Return the first child of DOM that is of type TAG."
93 (assoc tag
(dom-children dom
)))
95 (defun dom-by-tag (dom tag
)
96 "Return elements in DOM that is of type TAG.
97 A name is a symbol like `td'."
98 (let ((matches (cl-loop for child in
(dom-children dom
)
99 for matches
= (and (not (stringp child
))
100 (dom-by-tag child tag
))
103 (if (equal (dom-tag dom
) tag
)
107 (defun dom-strings (dom)
108 "Return elements in DOM that are strings."
109 (cl-loop for child in
(dom-children dom
)
113 append
(dom-strings child
)))
115 (defun dom-by-class (dom match
)
116 "Return elements in DOM that have a class name that matches regexp MATCH."
117 (dom-elements dom
'class match
))
119 (defun dom-by-style (dom match
)
120 "Return elements in DOM that have a style that matches regexp MATCH."
121 (dom-elements dom
'style match
))
123 (defun dom-by-id (dom match
)
124 "Return elements in DOM that have an ID that matches regexp MATCH."
125 (dom-elements dom
'id match
))
127 (defun dom-elements (dom attribute match
)
128 "Find elements matching MATCH (a regexp) in ATTRIBUTE.
129 ATTRIBUTE would typically be `class', `id' or the like."
130 (let ((matches (cl-loop for child in
(dom-children dom
)
131 for matches
= (and (not (stringp child
))
132 (dom-elements child attribute
136 (attr (dom-attr dom attribute
)))
138 (string-match match attr
))
142 (defun dom-parent (dom node
)
143 "Return the parent of NODE in DOM."
144 (if (memq node
(dom-children dom
))
147 (dolist (elem (dom-children dom
))
148 (when (and (not result
)
149 (not (stringp elem
)))
150 (setq result
(dom-parent elem node
))))
153 (defun dom-previous-sibling (dom node
)
154 (when-let (parent (dom-parent dom node
))
155 (let ((siblings (dom-children parent
))
158 (when (eq (cadr siblings
) node
)
159 (setq previous
(car siblings
)))
163 (defun dom-node (tag &optional attributes
&rest children
)
164 "Return a DOM node with TAG and ATTRIBUTES."
166 `(,tag
,attributes
,@children
)
167 (list tag attributes
)))
169 (defun dom-append-child (node child
)
170 "Append CHILD to the end of NODE's children."
171 (setq node
(dom-ensure-node node
))
172 (nconc node
(list child
)))
174 (defun dom-add-child-before (node child
&optional before
)
175 "Add CHILD to NODE's children before child BEFORE.
176 If BEFORE is nil, make CHILD NODE's first child."
177 (setq node
(dom-ensure-node node
))
178 (let ((children (dom-children node
)))
180 (not (memq before children
)))
181 (error "%s does not exist as a child" before
))
182 (let ((pos (if before
183 (cl-position before children
)
187 (setcdr (cdr node
) (cons child
(cddr node
)))
188 (setcdr (nthcdr (1- pos
) children
)
189 (cons child
(nthcdr pos children
))))))
192 (defun dom-ensure-node (node)
193 "Ensure that NODE is a proper DOM node."
194 ;; Add empty attributes, if none.
195 (when (consp (car node
))
196 (setq node
(car node
)))
197 (when (= (length node
) 1)
198 (setcdr node
(list nil
)))
201 (defun dom-pp (dom &optional remove-empty
)
202 "Pretty-print DOM at point.
203 If REMOVE-EMPTY, ignore textual nodes that contain just
205 (let ((column (current-column)))
206 (insert (format "(%S " (dom-tag dom
)))
207 (let* ((attr (dom-attributes dom
))
208 (times (length attr
))
209 (column (1+ (current-column))))
214 (insert (format "(%S . %S)" (car elem
) (cdr elem
)))
215 (if (zerop (cl-decf times
))
217 (insert "\n" (make-string column ?
))))))
218 (let* ((children (if remove-empty
222 (string-match "\\`[\n\r\t ]*\\'" child
)))
225 (times (length children
)))
228 (insert "\n" (make-string (1+ column
) ?
))
229 (dolist (child children
)
231 (if (or (not remove-empty
)
232 (not (string-match "\\`[\n\r\t ]*\\'" child
)))
233 (insert (format "%S" child
)))
234 (dom-pp child remove-empty
))
235 (if (zerop (cl-decf times
))
237 (insert "\n" (make-string (1+ column
) ?
))))))))