(gnus-blocked-images): Clarify privacy implications
[emacs.git] / lisp / dom.el
blob6045a68d14c2ef11ff8585f8eca89e30b385baba
1 ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions -*- lexical-binding: t -*-
3 ;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; Keywords: xml, html
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;;; Code:
27 (require 'cl-lib)
28 (eval-when-compile (require 'subr-x))
30 (defsubst dom-tag (node)
31 "Return the NODE tag."
32 ;; Called on a list of nodes. Use the first.
33 (if (consp (car node))
34 (caar node)
35 (car node)))
37 (defsubst dom-attributes (node)
38 "Return the NODE attributes."
39 ;; Called on a list of nodes. Use the first.
40 (if (consp (car node))
41 (cadr (car node))
42 (cadr node)))
44 (defsubst dom-children (node)
45 "Return the NODE children."
46 ;; Called on a list of nodes. Use the first.
47 (if (consp (car node))
48 (cddr (car node))
49 (cddr node)))
51 (defun dom-non-text-children (node)
52 "Return all non-text-node children of NODE."
53 (cl-loop for child in (dom-children node)
54 unless (stringp child)
55 collect child))
57 (defun dom-set-attributes (node attributes)
58 "Set the attributes of NODE to ATTRIBUTES."
59 (setq node (dom-ensure-node node))
60 (setcar (cdr node) attributes))
62 (defun dom-set-attribute (node attribute value)
63 "Set ATTRIBUTE in NODE to VALUE."
64 (setq node (dom-ensure-node node))
65 (let ((old (assoc attribute (cadr node))))
66 (if old
67 (setcdr old value)
68 (setcar (cdr node) (nconc (cadr node) (list (cons attribute value)))))))
70 (defmacro dom-attr (node attr)
71 "Return the attribute ATTR from NODE.
72 A typical attribute is `href'."
73 `(cdr (assq ,attr (dom-attributes ,node))))
75 (defun dom-text (node)
76 "Return all the text bits in the current node concatenated."
77 (mapconcat 'identity (cl-remove-if-not 'stringp (dom-children node)) " "))
79 (defun dom-texts (node &optional separator)
80 "Return all textual data under NODE concatenated with SEPARATOR in-between."
81 (if (eq (dom-tag node) 'script)
83 (mapconcat
84 'identity
85 (mapcar
86 (lambda (elem)
87 (cond
88 ((stringp elem)
89 elem)
90 ((eq (dom-tag elem) 'script)
91 "")
93 (dom-texts elem separator))))
94 (dom-children node))
95 (or separator " "))))
97 (defun dom-child-by-tag (dom tag)
98 "Return the first child of DOM that is of type TAG."
99 (assoc tag (dom-children dom)))
101 (defun dom-by-tag (dom tag)
102 "Return elements in DOM that is of type TAG.
103 A name is a symbol like `td'."
104 (let ((matches (cl-loop for child in (dom-children dom)
105 for matches = (and (not (stringp child))
106 (dom-by-tag child tag))
107 when matches
108 append matches)))
109 (if (equal (dom-tag dom) tag)
110 (cons dom matches)
111 matches)))
113 (defun dom-strings (dom)
114 "Return elements in DOM that are strings."
115 (cl-loop for child in (dom-children dom)
116 if (stringp child)
117 collect child
118 else
119 append (dom-strings child)))
121 (defun dom-by-class (dom match)
122 "Return elements in DOM that have a class name that matches regexp MATCH."
123 (dom-elements dom 'class match))
125 (defun dom-by-style (dom match)
126 "Return elements in DOM that have a style that matches regexp MATCH."
127 (dom-elements dom 'style match))
129 (defun dom-by-id (dom match)
130 "Return elements in DOM that have an ID that matches regexp MATCH."
131 (dom-elements dom 'id match))
133 (defun dom-elements (dom attribute match)
134 "Find elements matching MATCH (a regexp) in ATTRIBUTE.
135 ATTRIBUTE would typically be `class', `id' or the like."
136 (let ((matches (cl-loop for child in (dom-children dom)
137 for matches = (and (not (stringp child))
138 (dom-elements child attribute
139 match))
140 when matches
141 append matches))
142 (attr (dom-attr dom attribute)))
143 (if (and attr
144 (string-match match attr))
145 (cons dom matches)
146 matches)))
148 (defun dom-remove-node (dom node)
149 "Remove NODE from DOM."
150 ;; If we're removing the top level node, just return nil.
151 (dolist (child (dom-children dom))
152 (cond
153 ((eq node child)
154 (delq node dom))
155 ((not (stringp child))
156 (dom-remove-node child node)))))
158 (defun dom-parent (dom node)
159 "Return the parent of NODE in DOM."
160 (if (memq node (dom-children dom))
162 (let ((result nil))
163 (dolist (elem (dom-children dom))
164 (when (and (not result)
165 (not (stringp elem)))
166 (setq result (dom-parent elem node))))
167 result)))
169 (defun dom-previous-sibling (dom node)
170 "Return the previous sibling of NODE in DOM."
171 (when-let* ((parent (dom-parent dom node)))
172 (let ((siblings (dom-children parent))
173 (previous nil))
174 (while siblings
175 (when (eq (cadr siblings) node)
176 (setq previous (car siblings)))
177 (pop siblings))
178 previous)))
180 (defun dom-node (tag &optional attributes &rest children)
181 "Return a DOM node with TAG and ATTRIBUTES."
182 (if children
183 `(,tag ,attributes ,@children)
184 (list tag attributes)))
186 (defun dom-append-child (node child)
187 "Append CHILD to the end of NODE's children."
188 (setq node (dom-ensure-node node))
189 (nconc node (list child)))
191 (defun dom-add-child-before (node child &optional before)
192 "Add CHILD to NODE's children before child BEFORE.
193 If BEFORE is nil, make CHILD NODE's first child."
194 (setq node (dom-ensure-node node))
195 (let ((children (dom-children node)))
196 (when (and before
197 (not (memq before children)))
198 (error "%s does not exist as a child" before))
199 (let ((pos (if before
200 (cl-position before children)
201 0)))
202 (if (zerop pos)
203 ;; First child.
204 (setcdr (cdr node) (cons child (cddr node)))
205 (setcdr (nthcdr (1- pos) children)
206 (cons child (nthcdr pos children))))))
207 node)
209 (defun dom-ensure-node (node)
210 "Ensure that NODE is a proper DOM node."
211 ;; Add empty attributes, if none.
212 (when (consp (car node))
213 (setq node (car node)))
214 (when (= (length node) 1)
215 (setcdr node (list nil)))
216 node)
218 (defun dom-pp (dom &optional remove-empty)
219 "Pretty-print DOM at point.
220 If REMOVE-EMPTY, ignore textual nodes that contain just
221 white-space."
222 (let ((column (current-column)))
223 (insert (format "(%S " (dom-tag dom)))
224 (let* ((attr (dom-attributes dom))
225 (times (length attr))
226 (column (1+ (current-column))))
227 (if (null attr)
228 (insert "nil")
229 (insert "(")
230 (dolist (elem attr)
231 (insert (format "(%S . %S)" (car elem) (cdr elem)))
232 (if (zerop (cl-decf times))
233 (insert ")")
234 (insert "\n" (make-string column ? ))))))
235 (let* ((children (if remove-empty
236 (cl-remove-if
237 (lambda (child)
238 (and (stringp child)
239 (string-match "\\`[\n\r\t  ]*\\'" child)))
240 (dom-children dom))
241 (dom-children dom)))
242 (times (length children)))
243 (if (null children)
244 (insert ")")
245 (insert "\n" (make-string (1+ column) ? ))
246 (dolist (child children)
247 (if (stringp child)
248 (if (or (not remove-empty)
249 (not (string-match "\\`[\n\r\t  ]*\\'" child)))
250 (insert (format "%S" child)))
251 (dom-pp child remove-empty))
252 (if (zerop (cl-decf times))
253 (insert ")")
254 (insert "\n" (make-string (1+ column) ? ))))))))
256 (provide 'dom)
258 ;;; dom.el ends here