1 ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions
3 ;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 (defsubst dom-tag
(node)
30 "Return the NODE tag."
31 ;; Called on a list of nodes. Use the first.
32 (if (consp (car node
))
36 (defsubst dom-attributes
(node)
37 "Return the NODE attributes."
38 ;; Called on a list of nodes. Use the first.
39 (if (consp (car node
))
43 (defsubst dom-children
(node)
44 "Return the NODE children."
45 ;; Called on a list of nodes. Use the first.
46 (if (consp (car node
))
50 (defun dom-non-text-children (node)
51 "Return all non-text-node children of NODE."
52 (cl-loop for child in
(dom-children node
)
53 unless
(stringp child
)
56 (defun dom-set-attributes (node attributes
)
57 "Set the attributes of NODE to ATTRIBUTES."
58 (setq node
(dom-ensure-node node
))
59 (setcar (cdr node
) attributes
))
61 (defun dom-set-attribute (node attribute value
)
62 "Set ATTRIBUTE in NODE to VALUE."
63 (setq node
(dom-ensure-node node
))
64 (let ((old (assoc attribute
(cadr node
))))
67 (setcar (cdr node
) (nconc (cadr node
) (list (cons attribute value
)))))))
69 (defmacro dom-attr
(node attr
)
70 "Return the attribute ATTR from NODE.
71 A typical attribute is `href'."
72 `(cdr (assq ,attr
(dom-attributes ,node
))))
74 (defun dom-text (node)
75 "Return all the text bits in the current node concatenated."
76 (mapconcat 'identity
(cl-remove-if-not 'stringp
(dom-children node
)) " "))
78 (defun dom-texts (node &optional separator
)
79 "Return all textual data under NODE concatenated with SEPARATOR in-between."
86 (dom-texts elem separator
)))
90 (defun dom-child-by-tag (dom tag
)
91 "Return the first child of DOM that is of type TAG."
92 (assoc tag
(dom-children dom
)))
94 (defun dom-by-tag (dom tag
)
95 "Return elements in DOM that is of type TAG.
96 A name is a symbol like `td'."
97 (let ((matches (cl-loop for child in
(dom-children dom
)
98 for matches
= (and (not (stringp child
))
99 (dom-by-tag child tag
))
102 (if (equal (dom-tag dom
) tag
)
106 (defun dom-strings (dom)
107 "Return elements in DOM that are strings."
108 (cl-loop for child in
(dom-children dom
)
112 append
(dom-strings child
)))
114 (defun dom-by-class (dom match
)
115 "Return elements in DOM that have a class name that matches regexp MATCH."
116 (dom-elements dom
'class match
))
118 (defun dom-by-style (dom match
)
119 "Return elements in DOM that have a style that matches regexp MATCH."
120 (dom-elements dom
'style match
))
122 (defun dom-by-id (dom match
)
123 "Return elements in DOM that have an ID that matches regexp MATCH."
124 (dom-elements dom
'id match
))
126 (defun dom-elements (dom attribute match
)
127 "Find elements matching MATCH (a regexp) in ATTRIBUTE.
128 ATTRIBUTE would typically be `class', `id' or the like."
129 (let ((matches (cl-loop for child in
(dom-children dom
)
130 for matches
= (and (not (stringp child
))
131 (dom-elements child attribute
135 (attr (dom-attr dom attribute
)))
137 (string-match match attr
))
141 (defun dom-parent (dom node
)
142 "Return the parent of NODE in DOM."
143 (if (memq node
(dom-children dom
))
146 (dolist (elem (dom-children dom
))
147 (when (and (not result
)
148 (not (stringp elem
)))
149 (setq result
(dom-parent elem node
))))
152 (defun dom-previous-sibling (dom node
)
153 (when-let (parent (dom-parent dom node
))
154 (let ((siblings (dom-children parent
))
157 (when (eq (cadr siblings
) node
)
158 (setq previous
(car siblings
)))
162 (defun dom-node (tag &optional attributes
&rest children
)
163 "Return a DOM node with TAG and ATTRIBUTES."
165 `(,tag
,attributes
,@children
)
166 (list tag attributes
)))
168 (defun dom-append-child (node child
)
169 "Append CHILD to the end of NODE's children."
170 (setq node
(dom-ensure-node node
))
171 (nconc node
(list child
)))
173 (defun dom-add-child-before (node child
&optional before
)
174 "Add CHILD to NODE's children before child BEFORE.
175 If BEFORE is nil, make CHILD NODE's first child."
176 (setq node
(dom-ensure-node node
))
177 (let ((children (dom-children node
)))
179 (not (memq before children
)))
180 (error "%s does not exist as a child" before
))
181 (let ((pos (if before
182 (cl-position before children
)
186 (setcdr (cdr node
) (cons child
(cddr node
)))
187 (setcdr (nthcdr (1- pos
) children
)
188 (cons child
(nthcdr pos children
))))))
191 (defun dom-ensure-node (node)
192 "Ensure that NODE is a proper DOM node."
193 ;; Add empty attributes, if none.
194 (when (consp (car node
))
195 (setq node
(car node
)))
196 (when (= (length node
) 1)
197 (setcdr node
(list nil
)))
200 (defun dom-pp (dom &optional remove-empty
)
201 "Pretty-print DOM at point.
202 If REMOVE-EMPTY, ignore textual nodes that contain just
204 (let ((column (current-column)))
205 (insert (format "(%S " (dom-tag dom
)))
206 (let* ((attr (dom-attributes dom
))
207 (times (length attr
))
208 (column (1+ (current-column))))
213 (insert (format "(%S . %S)" (car elem
) (cdr elem
)))
214 (if (zerop (cl-decf times
))
216 (insert "\n" (make-string column ?
))))))
217 (let* ((children (if remove-empty
221 (string-match "\\`[\n\r\t ]*\\'" child
)))
224 (times (length children
)))
227 (insert "\n" (make-string (1+ column
) ?
))
228 (dolist (child children
)
230 (if (or (not remove-empty
)
231 (not (string-match "\\`[\n\r\t ]*\\'" child
)))
232 (insert (format "%S" child
)))
233 (dom-pp child remove-empty
))
234 (if (zerop (cl-decf times
))
236 (insert "\n" (make-string (1+ column
) ?
))))))))