DOM: Allocate all node strings when doing incremental rendering
[elinks.git] / src / dom / node.h
blob32948ea15aa4012c20612852202ce03bf67c7a86
2 #ifndef EL_DOM_NODE_H
3 #define EL_DOM_NODE_H
5 #include "dom/string.h"
6 #include "util/hash.h"
8 struct dom_node_list;
10 enum dom_node_type {
11 DOM_NODE_UNKNOWN = 0, /* for internal purpose only */
13 DOM_NODE_ELEMENT = 1,
14 DOM_NODE_ATTRIBUTE = 2,
15 DOM_NODE_TEXT = 3,
16 DOM_NODE_CDATA_SECTION = 4,
17 DOM_NODE_ENTITY_REFERENCE = 5,
18 DOM_NODE_ENTITY = 6,
19 DOM_NODE_PROCESSING_INSTRUCTION = 7,
20 DOM_NODE_COMMENT = 8,
21 DOM_NODE_DOCUMENT = 9,
22 DOM_NODE_DOCUMENT_TYPE = 10,
23 DOM_NODE_DOCUMENT_FRAGMENT = 11,
24 DOM_NODE_NOTATION = 12,
26 DOM_NODES
29 /* Following is the node specific datastructures. They may contain no more
30 * than 3 pointers or something equivalent. */
32 struct dom_node_id_item {
33 /* The attibute node containing the id value */
34 struct dom_node *id_attribute;
36 /* The node with the @id attribute */
37 struct dom_node *node;
40 struct dom_document_node {
41 /* The document URI is stored in the string / length members. */
42 /* An id to node hash for fast lookup. */
43 struct hash *element_ids; /* -> {struct dom_node_id_item} */
45 /* Any meta data the root node carries such as document type nodes,
46 * entity and notation map nodes and maybe some internal CSS stylesheet
47 * node. */
48 struct dom_node_list *meta_nodes;
50 /* The child nodes. May be NULL. Ordered like they where inserted. */
51 struct dom_node_list *children;
54 struct dom_id {
55 struct dom_string public_id;
56 struct dom_string system_id;
59 struct dom_doctype_subset_info {
60 struct dom_string internal;
61 struct dom_id external;
64 struct dom_document_type_node {
65 /* These are really maps and should be sorted alphabetically. */
66 struct dom_node_list *entities;
67 struct dom_node_list *notations;
69 /* The string/length members of dom_node hold the name of the document
70 * type "<!DOCTYPE {name} ...>". This holds the ids for the external
71 * subset and the string of the internal subset. */
72 struct dom_doctype_subset_infot *subset;
75 /* Element nodes are indexed nodes stored in node lists of either
76 * other child nodes or the root node. */
77 struct dom_element_node {
78 /* The child nodes. May be NULL. Ordered like they where inserted. */
79 struct dom_node_list *children;
81 /* Only element nodes can have attributes and element nodes can only be
82 * child nodes so the map is put here.
84 * The @map may be NULL if there are none. The @map nodes are sorted
85 * alphabetically according to the attributes name so it has fast
86 * lookup. */
87 struct dom_node_list *map;
89 /* For <xsl:stylesheet ...> elements this holds the offset of
90 * 'stylesheet' */
91 uint16_t namespace_offset;
93 /* Special implementation dependent type specifier for example
94 * containing an enum value representing the element to reduce string
95 * comparing and only do one fast find mapping. */
96 uint16_t type;
99 /* Attribute nodes are named nodes stored in a node map of an element node. */
100 struct dom_attribute_node {
101 /* The string that hold the attribute value. The @string / @length
102 * members of {struct dom_node} holds the name that identifies the node
103 * in the map. */
104 struct dom_string value;
106 /* For xml:lang="en" attributes this holds the offset of 'lang' */
107 uint16_t namespace_offset;
109 /* Special implementation dependent type specifier. For HTML it (will)
110 * contain an enum value representing the attribute HTML_CLASS, HTML_ID etc.
111 * to reduce string comparing and only do one fast find mapping. */
112 uint16_t type;
114 /* Was the attribute specified in the DTD as a default attribute or was
115 * it added from the document source. */
116 unsigned int specified:1;
118 /* Has the node->string been converted to internal charset. */
119 unsigned int converted:1;
121 /* Is the attribute a unique identifier meaning the owner (element)
122 * should be added to the document nodes @element_id hash. */
123 unsigned int id:1;
125 /* The attribute value references some other resource */
126 unsigned int reference:1;
128 /* The attribute value is delimited by quotes */
129 unsigned int quoted:1;
132 struct dom_text_node {
133 /* The number of newlines the text string contains */
134 unsigned int newlines;
136 /* We will need to add text nodes even if they contain only whitespace.
137 * In order to quickly identify such nodes this member is used. */
138 unsigned int only_space:1;
140 /* Has the node->string been converted to internal charset. */
141 unsigned int converted:1;
144 enum dom_proc_instruction_type {
145 DOM_PROC_INSTRUCTION,
147 /* Keep this group sorted */
148 DOM_PROC_INSTRUCTION_XML, /* XML header */
149 DOM_PROC_INSTRUCTION_XML_STYLESHEET, /* XML stylesheet link */
151 DOM_PROC_INSTRUCTION_TYPES
154 struct dom_proc_instruction_node {
155 /* The target of the processing instruction (xml for '<?xml ... ?>')
156 * is in the @string / @length members. */
157 /* This holds the value to be processed */
158 struct dom_string instruction;
160 /* For fast checking of the target type */
161 uint16_t type; /* enum dom_proc_instruction_type */
163 /* For some processing instructions like xml the instructions contain
164 * attributes and those attribute can be collected in this @map. */
165 struct dom_node_list *map;
168 union dom_node_data {
169 struct dom_document_node document;
170 struct dom_document_type_node document_type;
171 struct dom_element_node element;
172 struct dom_attribute_node attribute;
173 struct dom_text_node text;
174 struct dom_id notation;
175 /* For entities string/length hold the notation name */
176 struct dom_id entity;
177 struct dom_proc_instruction_node proc_instruction;
179 /* Node types without a union member yet
181 * DOM_NODE_CDATA_SECTION,
182 * DOM_NODE_COMMENT,
183 * DOM_NODE_DOCUMENT_FRAGMENT,
184 * DOM_NODE_ENTITY_REFERENCE,
188 /* This structure is size critical so keep ordering to make it easier to pack
189 * and avoid unneeded members. */
190 struct dom_node {
191 /* The type of the node */
192 uint16_t type; /* -> enum dom_node_type */
194 /* Was the node string allocated? */
195 unsigned int allocated:1;
197 /* Can contain either stuff like element name or for attributes the
198 * attribute name. */
199 struct dom_string string;
201 struct dom_node *parent;
203 /* Various info depending on the type of the node. */
204 union dom_node_data data;
207 /* A node list can be used for storing indexed nodes */
208 struct dom_node_list {
209 size_t size;
210 struct dom_node *entries[1];
213 #define foreach_dom_node(list, node, i) \
214 for ((i) = 0; (i) < (list)->size; (i)++) \
215 if (((node) = (list)->entries[(i)]))
217 #define foreachback_dom_node(list, node, i) \
218 for ((i) = (list)->size - 1; (i) > 0; (i)--) \
219 if (((node) = (list)->entries[(i)]))
221 #define is_dom_node_list_member(list, member) \
222 ((list) && 0 <= (member) && (member) < (list)->size)
224 /* Adds @node to the list pointed to by @list_ptr at the given @position. If
225 * @position is -1 the node is added at the end. */
226 struct dom_node_list *
227 add_to_dom_node_list(struct dom_node_list **list_ptr,
228 struct dom_node *node, int position);
230 void done_dom_node_list(struct dom_node_list *list);
232 /* Returns the position or index where the @node has been inserted into the
233 * 'default' list of the @parent node. (Default means use get_dom_node_list()
234 * to acquire the list to search in. Returns -1, if the node is not found. */
235 int get_dom_node_list_index(struct dom_node *parent, struct dom_node *node);
237 /* Returns the position or index where the @node should be inserted into the
238 * node @list in order to the list to be alphabetically sorted. Assumes that
239 * @list is already sorted properly. */
240 int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
242 /* Returns the previous sibling to the node. */
243 struct dom_node *get_dom_node_prev(struct dom_node *node);
245 /* Returns first text node of the element or NULL. */
246 struct dom_node *
247 get_dom_node_child(struct dom_node *node, enum dom_node_type child_type,
248 int16_t child_subtype);
250 /* Looks up the @node_map for a node matching the requested type and name.
251 * The @subtype maybe be 0 indication unknown subtype and only name should be
252 * tested else it will indicate either the element or attribute private
253 * subtype. */
254 struct dom_node *
255 get_dom_node_map_entry(struct dom_node_list *node_map,
256 enum dom_node_type type, uint16_t subtype,
257 struct dom_string *name);
259 /* Removes the node and all its children and free()s itself */
260 void done_dom_node(struct dom_node *node);
262 /* The allocated argument is used as the value of node->allocated if >= 0.
263 * Use -1 to default node->allocated to the value of parent->allocated. */
264 struct dom_node *
265 init_dom_node_(unsigned char *file, int line,
266 struct dom_node *parent, enum dom_node_type type,
267 struct dom_string *string, int allocated);
269 #define init_dom_node(type, string, allocated) \
270 init_dom_node_(__FILE__, __LINE__, NULL, type, string, allocated)
272 #define add_dom_node(parent, type, string) \
273 init_dom_node_(__FILE__, __LINE__, parent, type, string, -1)
275 #define add_dom_element(parent, string) \
276 add_dom_node(parent, DOM_NODE_ELEMENT, string)
278 static inline struct dom_node *
279 add_dom_attribute(struct dom_node *parent, struct dom_string *name,
280 struct dom_string *value)
282 struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
284 if (node && value) {
285 struct dom_string *str = &node->data.attribute.value;
287 if (node->allocated) {
288 if (!init_dom_string(str, value->string, value->length)) {
289 done_dom_node(node);
290 return NULL;
292 } else {
293 copy_dom_string(str, value);
297 return node;
300 static inline struct dom_node *
301 add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
302 struct dom_string *instruction)
304 struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
306 if (node && instruction) {
307 struct dom_string *str = &node->data.proc_instruction.instruction;
309 if (node->allocated) {
310 if (!init_dom_string(str, instruction->string, instruction->length)) {
311 done_dom_node(node);
312 return NULL;
314 } else {
315 copy_dom_string(str, instruction);
319 return node;
322 /* Compare two nodes returning non-zero if they differ. */
323 int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);
325 /* Returns the name of the node in an allocated string. */
326 struct dom_string *get_dom_node_name(struct dom_node *node);
328 /* Returns the value of the node or NULL if no value is defined for the node
329 * type. */
330 struct dom_string *get_dom_node_value(struct dom_node *node);
332 /* Returns the name used for identifying the node type. */
333 struct dom_string *get_dom_node_type_name(enum dom_node_type type);
335 /* Based on the type of the parent and the node type return a proper list
336 * or NULL. This is useful when adding a node to a parent node. */
337 static inline struct dom_node_list **
338 get_dom_node_list_by_type(struct dom_node *parent, enum dom_node_type type)
340 switch (parent->type) {
341 case DOM_NODE_DOCUMENT:
342 return &parent->data.document.children;
344 case DOM_NODE_ELEMENT:
345 switch (type) {
346 case DOM_NODE_ATTRIBUTE:
347 return &parent->data.element.map;
349 default:
350 return &parent->data.element.children;
353 case DOM_NODE_DOCUMENT_TYPE:
354 switch (type) {
355 case DOM_NODE_ENTITY:
356 return &parent->data.document_type.entities;
358 case DOM_NODE_NOTATION:
359 return &parent->data.document_type.notations;
361 default:
362 return NULL;
365 case DOM_NODE_PROCESSING_INSTRUCTION:
366 switch (type) {
367 case DOM_NODE_ATTRIBUTE:
368 return &parent->data.proc_instruction.map;
370 default:
371 return NULL;
374 default:
375 return NULL;
379 #define get_dom_node_list(parent, node) \
380 get_dom_node_list_by_type(parent, (node)->type)
382 #endif