1 ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions -*- lexical-binding: t -*-
3 ;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
28 (eval-when-compile (require 'subr-x
))
30 (defsubst dom-tag
(node)
31 "Return the NODE tag."
32 ;; Called on a list of nodes. Use the first.
33 (if (consp (car node
))
37 (defsubst dom-attributes
(node)
38 "Return the NODE attributes."
39 ;; Called on a list of nodes. Use the first.
40 (if (consp (car node
))
44 (defsubst dom-children
(node)
45 "Return the NODE children."
46 ;; Called on a list of nodes. Use the first.
47 (if (consp (car node
))
51 (defun dom-non-text-children (node)
52 "Return all non-text-node children of NODE."
53 (cl-loop for child in
(dom-children node
)
54 unless
(stringp child
)
57 (defun dom-set-attributes (node attributes
)
58 "Set the attributes of NODE to ATTRIBUTES."
59 (setq node
(dom-ensure-node node
))
60 (setcar (cdr node
) attributes
))
62 (defun dom-set-attribute (node attribute value
)
63 "Set ATTRIBUTE in NODE to VALUE."
64 (setq node
(dom-ensure-node node
))
65 (let ((old (assoc attribute
(cadr node
))))
68 (setcar (cdr node
) (nconc (cadr node
) (list (cons attribute value
)))))))
70 (defmacro dom-attr
(node attr
)
71 "Return the attribute ATTR from NODE.
72 A typical attribute is `href'."
73 `(cdr (assq ,attr
(dom-attributes ,node
))))
75 (defun dom-text (node)
76 "Return all the text bits in the current node concatenated."
77 (mapconcat 'identity
(cl-remove-if-not 'stringp
(dom-children node
)) " "))
79 (defun dom-texts (node &optional separator
)
80 "Return all textual data under NODE concatenated with SEPARATOR in-between."
81 (if (eq (dom-tag node
) 'script
)
90 ((eq (dom-tag elem
) 'script
)
93 (dom-texts elem separator
))))
97 (defun dom-child-by-tag (dom tag
)
98 "Return the first child of DOM that is of type TAG."
99 (assoc tag
(dom-children dom
)))
101 (defun dom-by-tag (dom tag
)
102 "Return elements in DOM that is of type TAG.
103 A name is a symbol like `td'."
104 (let ((matches (cl-loop for child in
(dom-children dom
)
105 for matches
= (and (not (stringp child
))
106 (dom-by-tag child tag
))
109 (if (equal (dom-tag dom
) tag
)
113 (defun dom-strings (dom)
114 "Return elements in DOM that are strings."
115 (cl-loop for child in
(dom-children dom
)
119 append
(dom-strings child
)))
121 (defun dom-by-class (dom match
)
122 "Return elements in DOM that have a class name that matches regexp MATCH."
123 (dom-elements dom
'class match
))
125 (defun dom-by-style (dom match
)
126 "Return elements in DOM that have a style that matches regexp MATCH."
127 (dom-elements dom
'style match
))
129 (defun dom-by-id (dom match
)
130 "Return elements in DOM that have an ID that matches regexp MATCH."
131 (dom-elements dom
'id match
))
133 (defun dom-elements (dom attribute match
)
134 "Find elements matching MATCH (a regexp) in ATTRIBUTE.
135 ATTRIBUTE would typically be `class', `id' or the like."
136 (let ((matches (cl-loop for child in
(dom-children dom
)
137 for matches
= (and (not (stringp child
))
138 (dom-elements child attribute
142 (attr (dom-attr dom attribute
)))
144 (string-match match attr
))
148 (defun dom-remove-node (dom node
)
149 "Remove NODE from DOM."
150 ;; If we're removing the top level node, just return nil.
151 (dolist (child (dom-children dom
))
155 ((not (stringp child
))
156 (dom-remove-node child node
)))))
158 (defun dom-parent (dom node
)
159 "Return the parent of NODE in DOM."
160 (if (memq node
(dom-children dom
))
163 (dolist (elem (dom-children dom
))
164 (when (and (not result
)
165 (not (stringp elem
)))
166 (setq result
(dom-parent elem node
))))
169 (defun dom-previous-sibling (dom node
)
170 "Return the previous sibling of NODE in DOM."
171 (when-let* ((parent (dom-parent dom node
)))
172 (let ((siblings (dom-children parent
))
175 (when (eq (cadr siblings
) node
)
176 (setq previous
(car siblings
)))
180 (defun dom-node (tag &optional attributes
&rest children
)
181 "Return a DOM node with TAG and ATTRIBUTES."
183 `(,tag
,attributes
,@children
)
184 (list tag attributes
)))
186 (defun dom-append-child (node child
)
187 "Append CHILD to the end of NODE's children."
188 (setq node
(dom-ensure-node node
))
189 (nconc node
(list child
)))
191 (defun dom-add-child-before (node child
&optional before
)
192 "Add CHILD to NODE's children before child BEFORE.
193 If BEFORE is nil, make CHILD NODE's first child."
194 (setq node
(dom-ensure-node node
))
195 (let ((children (dom-children node
)))
197 (not (memq before children
)))
198 (error "%s does not exist as a child" before
))
199 (let ((pos (if before
200 (cl-position before children
)
204 (setcdr (cdr node
) (cons child
(cddr node
)))
205 (setcdr (nthcdr (1- pos
) children
)
206 (cons child
(nthcdr pos children
))))))
209 (defun dom-ensure-node (node)
210 "Ensure that NODE is a proper DOM node."
211 ;; Add empty attributes, if none.
212 (when (consp (car node
))
213 (setq node
(car node
)))
214 (when (= (length node
) 1)
215 (setcdr node
(list nil
)))
218 (defun dom-pp (dom &optional remove-empty
)
219 "Pretty-print DOM at point.
220 If REMOVE-EMPTY, ignore textual nodes that contain just
222 (let ((column (current-column)))
223 (insert (format "(%S " (dom-tag dom
)))
224 (let* ((attr (dom-attributes dom
))
225 (times (length attr
))
226 (column (1+ (current-column))))
231 (insert (format "(%S . %S)" (car elem
) (cdr elem
)))
232 (if (zerop (cl-decf times
))
234 (insert "\n" (make-string column ?
))))))
235 (let* ((children (if remove-empty
239 (string-match "\\`[\n\r\t ]*\\'" child
)))
242 (times (length children
)))
245 (insert "\n" (make-string (1+ column
) ?
))
246 (dolist (child children
)
248 (if (or (not remove-empty
)
249 (not (string-match "\\`[\n\r\t ]*\\'" child
)))
250 (insert (format "%S" child
)))
251 (dom-pp child remove-empty
))
252 (if (zerop (cl-decf times
))
254 (insert "\n" (make-string (1+ column
) ?
))))))))