1 ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions -*- lexical-binding: t -*-
3 ;; Copyright (C) 2014-2017 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 (eval-when-compile (require 'subr-x
))
30 (defsubst dom-tag
(node)
31 "Return the NODE tag."
32 ;; Called on a list of nodes. Use the first.
33 (if (consp (car node
))
37 (defsubst dom-attributes
(node)
38 "Return the NODE attributes."
39 ;; Called on a list of nodes. Use the first.
40 (if (consp (car node
))
44 (defsubst dom-children
(node)
45 "Return the NODE children."
46 ;; Called on a list of nodes. Use the first.
47 (if (consp (car node
))
51 (defun dom-non-text-children (node)
52 "Return all non-text-node children of NODE."
53 (cl-loop for child in
(dom-children node
)
54 unless
(stringp child
)
57 (defun dom-set-attributes (node attributes
)
58 "Set the attributes of NODE to ATTRIBUTES."
59 (setq node
(dom-ensure-node node
))
60 (setcar (cdr node
) attributes
))
62 (defun dom-set-attribute (node attribute value
)
63 "Set ATTRIBUTE in NODE to VALUE."
64 (setq node
(dom-ensure-node node
))
65 (let ((old (assoc attribute
(cadr node
))))
68 (setcar (cdr node
) (nconc (cadr node
) (list (cons attribute value
)))))))
70 (defmacro dom-attr
(node attr
)
71 "Return the attribute ATTR from NODE.
72 A typical attribute is `href'."
73 `(cdr (assq ,attr
(dom-attributes ,node
))))
75 (defun dom-text (node)
76 "Return all the text bits in the current node concatenated."
77 (mapconcat 'identity
(cl-remove-if-not 'stringp
(dom-children node
)) " "))
79 (defun dom-texts (node &optional separator
)
80 "Return all textual data under NODE concatenated with SEPARATOR in-between."
87 (dom-texts elem separator
)))
91 (defun dom-child-by-tag (dom tag
)
92 "Return the first child of DOM that is of type TAG."
93 (assoc tag
(dom-children dom
)))
95 (defun dom-by-tag (dom tag
)
96 "Return elements in DOM that is of type TAG.
97 A name is a symbol like `td'."
98 (let ((matches (cl-loop for child in
(dom-children dom
)
99 for matches
= (and (not (stringp child
))
100 (dom-by-tag child tag
))
103 (if (equal (dom-tag dom
) tag
)
107 (defun dom-strings (dom)
108 "Return elements in DOM that are strings."
109 (cl-loop for child in
(dom-children dom
)
113 append
(dom-strings child
)))
115 (defun dom-by-class (dom match
)
116 "Return elements in DOM that have a class name that matches regexp MATCH."
117 (dom-elements dom
'class match
))
119 (defun dom-by-style (dom match
)
120 "Return elements in DOM that have a style that matches regexp MATCH."
121 (dom-elements dom
'style match
))
123 (defun dom-by-id (dom match
)
124 "Return elements in DOM that have an ID that matches regexp MATCH."
125 (dom-elements dom
'id match
))
127 (defun dom-elements (dom attribute match
)
128 "Find elements matching MATCH (a regexp) in ATTRIBUTE.
129 ATTRIBUTE would typically be `class', `id' or the like."
130 (let ((matches (cl-loop for child in
(dom-children dom
)
131 for matches
= (and (not (stringp child
))
132 (dom-elements child attribute
136 (attr (dom-attr dom attribute
)))
138 (string-match match attr
))
142 (defun dom-remove-node (dom node
)
143 "Remove NODE from DOM."
144 ;; If we're removing the top level node, just return nil.
145 (dolist (child (dom-children dom
))
149 ((not (stringp child
))
150 (dom-remove-node child node
)))))
152 (defun dom-parent (dom node
)
153 "Return the parent of NODE in DOM."
154 (if (memq node
(dom-children dom
))
157 (dolist (elem (dom-children dom
))
158 (when (and (not result
)
159 (not (stringp elem
)))
160 (setq result
(dom-parent elem node
))))
163 (defun dom-previous-sibling (dom node
)
164 "Return the previous sibling of NODE in DOM."
165 (when-let (parent (dom-parent dom node
))
166 (let ((siblings (dom-children parent
))
169 (when (eq (cadr siblings
) node
)
170 (setq previous
(car siblings
)))
174 (defun dom-node (tag &optional attributes
&rest children
)
175 "Return a DOM node with TAG and ATTRIBUTES."
177 `(,tag
,attributes
,@children
)
178 (list tag attributes
)))
180 (defun dom-append-child (node child
)
181 "Append CHILD to the end of NODE's children."
182 (setq node
(dom-ensure-node node
))
183 (nconc node
(list child
)))
185 (defun dom-add-child-before (node child
&optional before
)
186 "Add CHILD to NODE's children before child BEFORE.
187 If BEFORE is nil, make CHILD NODE's first child."
188 (setq node
(dom-ensure-node node
))
189 (let ((children (dom-children node
)))
191 (not (memq before children
)))
192 (error "%s does not exist as a child" before
))
193 (let ((pos (if before
194 (cl-position before children
)
198 (setcdr (cdr node
) (cons child
(cddr node
)))
199 (setcdr (nthcdr (1- pos
) children
)
200 (cons child
(nthcdr pos children
))))))
203 (defun dom-ensure-node (node)
204 "Ensure that NODE is a proper DOM node."
205 ;; Add empty attributes, if none.
206 (when (consp (car node
))
207 (setq node
(car node
)))
208 (when (= (length node
) 1)
209 (setcdr node
(list nil
)))
212 (defun dom-pp (dom &optional remove-empty
)
213 "Pretty-print DOM at point.
214 If REMOVE-EMPTY, ignore textual nodes that contain just
216 (let ((column (current-column)))
217 (insert (format "(%S " (dom-tag dom
)))
218 (let* ((attr (dom-attributes dom
))
219 (times (length attr
))
220 (column (1+ (current-column))))
225 (insert (format "(%S . %S)" (car elem
) (cdr elem
)))
226 (if (zerop (cl-decf times
))
228 (insert "\n" (make-string column ?
))))))
229 (let* ((children (if remove-empty
233 (string-match "\\`[\n\r\t ]*\\'" child
)))
236 (times (length children
)))
239 (insert "\n" (make-string (1+ column
) ?
))
240 (dolist (child children
)
242 (if (or (not remove-empty
)
243 (not (string-match "\\`[\n\r\t ]*\\'" child
)))
244 (insert (format "%S" child
)))
245 (dom-pp child remove-empty
))
246 (if (zerop (cl-decf times
))
248 (insert "\n" (make-string (1+ column
) ?
))))))))