1 ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions
3 ;; Copyright (C) 2014 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 (defsubst dom-tag
(node)
30 "Return the NODE tag."
31 ;; Called on a list of nodes. Use the first.
32 (if (consp (car node
))
36 (defsubst dom-attributes
(node)
37 "Return the NODE attributes."
38 ;; Called on a list of nodes. Use the first.
39 (if (consp (car node
))
43 (defsubst dom-children
(node)
44 "Return the NODE children."
45 ;; Called on a list of nodes. Use the first.
46 (if (consp (car node
))
50 (defun dom-non-text-children (node)
51 "Return all non-text-node children of NODE."
52 (cl-loop for child in
(dom-children node
)
53 unless
(stringp child
)
56 (defun dom-set-attributes (node attributes
)
57 "Set the attributes of NODE to ATTRIBUTES."
58 (setq node
(dom-ensure-node node
))
59 (setcar (cdr node
) attributes
))
61 (defun dom-set-attribute (node attribute value
)
62 "Set ATTRIBUTE in NODE to VALUE."
63 (setq node
(dom-ensure-node node
))
64 (let ((old (assoc attribute
(cadr node
))))
67 (setcar (cdr node
) (nconc (cadr node
) (list (cons attribute value
)))))))
69 (defmacro dom-attr
(node attr
)
70 "Return the attribute ATTR from NODE.
71 A typical attribute is `href'."
72 `(cdr (assq ,attr
(dom-attributes ,node
))))
74 (defun dom-text (node)
75 "Return all the text bits in the current node concatenated."
76 (mapconcat 'identity
(cl-remove-if-not 'stringp
(dom-children node
)) " "))
78 (defun dom-texts (node &optional separator
)
79 "Return all textual data under NODE concatenated with SEPARATOR in-between."
86 (dom-texts elem separator
)))
90 (defun dom-child-by-tag (dom tag
)
91 "Return the first child of DOM that is of type TAG."
92 (assoc tag
(dom-children dom
)))
94 (defun dom-by-tag (dom tag
)
95 "Return elements in DOM that is of type TAG.
96 A name is a symbol like `td'."
97 (let ((matches (cl-loop for child in
(dom-children dom
)
98 for matches
= (and (not (stringp child
))
99 (dom-by-tag child tag
))
102 (if (equal (dom-tag dom
) tag
)
106 (defun dom-by-class (dom match
)
107 "Return elements in DOM that have a class name that matches regexp MATCH."
108 (dom-elements dom
'class match
))
110 (defun dom-by-style (dom match
)
111 "Return elements in DOM that have a style that matches regexp MATCH."
112 (dom-elements dom
'style match
))
114 (defun dom-by-id (dom match
)
115 "Return elements in DOM that have an ID that matches regexp MATCH."
116 (dom-elements dom
'id match
))
118 (defun dom-elements (dom attribute match
)
119 "Find elements matching MATCH (a regexp) in ATTRIBUTE.
120 ATTRIBUTE would typically be `class', `id' or the like."
121 (let ((matches (cl-loop for child in
(dom-children dom
)
122 for matches
= (and (not (stringp child
))
123 (dom-elements child attribute
127 (attr (dom-attr dom attribute
)))
129 (string-match match attr
))
133 (defun dom-parent (dom node
)
134 "Return the parent of NODE in DOM."
135 (if (memq node
(dom-children dom
))
138 (dolist (elem (dom-children dom
))
139 (when (and (not result
)
140 (not (stringp elem
)))
141 (setq result
(dom-parent elem node
))))
144 (defun dom-node (tag &optional attributes
&rest children
)
145 "Return a DOM node with TAG and ATTRIBUTES."
147 `(,tag
,attributes
,@children
)
148 (list tag attributes
)))
150 (defun dom-append-child (node child
)
151 "Append CHILD to the end of NODE's children."
152 (setq node
(dom-ensure-node node
))
153 (nconc node
(list child
)))
155 (defun dom-add-child-before (node child
&optional before
)
156 "Add CHILD to NODE's children before child BEFORE.
157 If BEFORE is nil, make CHILD NODE's first child."
158 (setq node
(dom-ensure-node node
))
159 (let ((children (dom-children node
)))
161 (not (memq before children
)))
162 (error "%s does not exist as a child" before
))
163 (let ((pos (if before
164 (cl-position before children
)
168 (setcdr (cdr node
) (cons child
(cddr node
)))
169 (setcdr (nthcdr (1- pos
) children
)
170 (cons child
(nthcdr pos children
))))))
173 (defun dom-ensure-node (node)
174 "Ensure that NODE is a proper DOM node."
175 ;; Add empty attributes, if none.
176 (when (consp (car node
))
177 (setq node
(car node
)))
178 (when (= (length node
) 1)
179 (setcdr node
(list nil
)))
182 (defun dom-pp (dom &optional remove-empty
)
183 "Pretty-print DOM at point.
184 If REMOVE-EMPTY, ignore textual nodes that contain just
186 (let ((column (current-column)))
187 (insert (format "(%S " (dom-tag dom
)))
188 (let* ((attr (dom-attributes dom
))
189 (times (length attr
))
190 (column (1+ (current-column))))
195 (insert (format "(%S . %S)" (car elem
) (cdr elem
)))
196 (if (zerop (cl-decf times
))
198 (insert "\n" (make-string column ?
))))))
199 (let* ((children (if remove-empty
203 (string-match "\\`[\n\r\t ]*\\'" child
)))
206 (times (length children
)))
209 (insert "\n" (make-string (1+ column
) ?
))
210 (dolist (child children
)
212 (if (or (not remove-empty
)
213 (not (string-match "\\`[\n\r\t ]*\\'" child
)))
214 (insert (format "%S" child
)))
215 (dom-pp child remove-empty
))
216 (if (zerop (cl-decf times
))
218 (insert "\n" (make-string (1+ column
) ?
))))))))