Add DOM_NODE_UNKNOWN node type for internal purposes only
[elinks.git] / src / document / dom / node.h
blob5f74263a73fac918270a4060a4743cc1e7384eca
2 #ifndef EL__DOCUMENT_DOM_NODE_H
3 #define EL__DOCUMENT_DOM_NODE_H
5 #include "document/dom/string.h"
6 #include "util/hash.h"
8 struct dom_node_list;
10 enum dom_node_type {
11 DOM_NODE_UNKNOWN = 0, /* for internal purpose only */
13 DOM_NODE_ELEMENT = 1,
14 DOM_NODE_ATTRIBUTE = 2,
15 DOM_NODE_TEXT = 3,
16 DOM_NODE_CDATA_SECTION = 4,
17 DOM_NODE_ENTITY_REFERENCE = 5,
18 DOM_NODE_ENTITY = 6,
19 DOM_NODE_PROCESSING_INSTRUCTION = 7,
20 DOM_NODE_COMMENT = 8,
21 DOM_NODE_DOCUMENT = 9,
22 DOM_NODE_DOCUMENT_TYPE = 10,
23 DOM_NODE_DOCUMENT_FRAGMENT = 11,
24 DOM_NODE_NOTATION = 12,
26 DOM_NODES
29 /* Following is the node specific datastructures. They may contain no more
30 * than 3 pointers or something equivalent. */
32 struct dom_node_id_item {
33 /* The attibute node containing the id value */
34 struct dom_node *id_attribute;
36 /* The node with the @id attribute */
37 struct dom_node *node;
40 struct dom_document_node {
41 /* The document URI is stored in the string / length members. */
42 /* An id to node hash for fast lookup. */
43 struct hash *element_ids; /* -> {struct dom_node_id_item} */
45 /* Any meta data the root node carries such as document type nodes,
46 * entity and notation map nodes and maybe some internal CSS stylesheet
47 * node. */
48 struct dom_node_list *meta_nodes;
50 /* The child nodes. May be NULL. Ordered like they where inserted. */
51 struct dom_node_list *children;
54 struct dom_id {
55 struct dom_string public_id;
56 struct dom_string system_id;
59 struct dom_doctype_subset_info {
60 struct dom_string internal;
61 struct dom_id external;
64 struct dom_document_type_node {
65 /* These are really maps and should be sorted alphabetically. */
66 struct dom_node_list *entities;
67 struct dom_node_list *notations;
69 /* The string/length members of dom_node hold the name of the document
70 * type "<!DOCTYPE {name} ...>". This holds the ids for the external
71 * subset and the string of the internal subset. */
72 struct dom_doctype_subset_infot *subset;
75 /* Element nodes are indexed nodes stored in node lists of either
76 * other child nodes or the root node. */
77 struct dom_element_node {
78 /* The child nodes. May be NULL. Ordered like they where inserted. */
79 struct dom_node_list *children;
81 /* Only element nodes can have attributes and element nodes can only be
82 * child nodes so the map is put here.
84 * The @map may be NULL if there are none. The @map nodes are sorted
85 * alphabetically according to the attributes name so it has fast
86 * lookup. */
87 struct dom_node_list *map;
89 /* For <xsl:stylesheet ...> elements this holds the offset of
90 * 'stylesheet' */
91 uint16_t namespace_offset;
93 /* Special implementation dependent type specifier for example
94 * containing an enum value representing the element to reduce string
95 * comparing and only do one fast find mapping. */
96 uint16_t type;
99 /* Attribute nodes are named nodes stored in a node map of an element node. */
100 struct dom_attribute_node {
101 /* The string that hold the attribute value. The @string / @length
102 * members of {struct dom_node} holds the name that identifies the node
103 * in the map. */
104 struct dom_string value;
106 /* For xml:lang="en" attributes this holds the offset of 'lang' */
107 uint16_t namespace_offset;
109 /* Special implementation dependent type specifier. For HTML it (will)
110 * contain an enum value representing the attribute HTML_CLASS, HTML_ID etc.
111 * to reduce string comparing and only do one fast find mapping. */
112 uint16_t type;
114 /* Was the attribute specified in the DTD as a default attribute or was
115 * it added from the document source. */
116 unsigned int specified:1;
118 /* Was the node->string allocated */
119 unsigned int allocated:1;
121 /* Has the node->string been converted to internal charset. */
122 unsigned int converted:1;
124 /* Is the attribute a unique identifier meaning the owner (element)
125 * should be added to the document nodes @element_id hash. */
126 unsigned int id:1;
128 /* The attribute value references some other resource */
129 unsigned int reference:1;
131 /* The attribute value is delimited by quotes */
132 unsigned int quoted:1;
135 struct dom_text_node {
136 /* The number of newlines the text string contains */
137 unsigned int newlines;
139 /* We will need to add text nodes even if they contain only whitespace.
140 * In order to quickly identify such nodes this member is used. */
141 unsigned int only_space:1;
143 /* Was the node->string allocated */
144 unsigned int allocated:1;
146 /* Has the node->string been converted to internal charset. */
147 unsigned int converted:1;
150 enum dom_proc_instruction_type {
151 DOM_PROC_INSTRUCTION,
153 /* Keep this group sorted */
154 DOM_PROC_INSTRUCTION_DBHTML, /* DocBook toolchain instruction */
155 DOM_PROC_INSTRUCTION_ELINKS, /* Internal instruction hook */
156 DOM_PROC_INSTRUCTION_XML, /* XML instructions */
158 DOM_PROC_INSTRUCTION_TYPES
161 struct dom_proc_instruction_node {
162 /* The target of the processing instruction (xml for '<?xml ... ?>')
163 * is in the @string / @length members. */
164 /* This holds the value to be processed */
165 struct dom_string instruction;
167 /* For fast checking of the target type */
168 uint16_t type; /* enum dom_proc_instruction_type */
170 /* For some processing instructions like xml the instructions contain
171 * attributes and those attribute can be collected in this @map. */
172 struct dom_node_list *map;
175 union dom_node_data {
176 struct dom_document_node document;
177 struct dom_document_type_node document_type;
178 struct dom_element_node element;
179 struct dom_attribute_node attribute;
180 struct dom_text_node text;
181 struct dom_id notation;
182 /* For entities string/length hold the notation name */
183 struct dom_id entity;
184 struct dom_proc_instruction_node proc_instruction;
186 /* Node types without a union member yet
188 * DOM_NODE_CDATA_SECTION,
189 * DOM_NODE_COMMENT,
190 * DOM_NODE_DOCUMENT_FRAGMENT,
191 * DOM_NODE_ENTITY_REFERENCE,
195 /* This structure is size critical so keep ordering to make it easier to pack
196 * and avoid unneeded members. */
197 struct dom_node {
198 /* The type of the node */
199 uint16_t type; /* -> enum dom_node_type */
201 /* Can contain either stuff like element name or for attributes the
202 * attribute name. */
203 struct dom_string string;
205 struct dom_node *parent;
207 /* Various info depending on the type of the node. */
208 union dom_node_data data;
211 /* A node list can be used for storing indexed nodes */
212 struct dom_node_list {
213 size_t size;
214 struct dom_node *entries[1];
217 #define foreach_dom_node(i, node, list) \
218 for ((i) = 0; (i) < (list)->size; (i)++) \
219 if (((node) = (list)->entries[(i)]))
221 #define foreachback_dom_node(i, node, list) \
222 for ((i) = (list)->size - 1; (i) > 0; (i)--) \
223 if (((node) = (list)->entries[(i)]))
225 #define is_dom_node_list_member(list, member) \
226 ((list) && 0 <= (member) && (member) < (list)->size)
228 /* Adds @node to the list pointed to by @list_ptr at the given @position. If
229 * @position is -1 the node is added at the end. */
230 struct dom_node_list *
231 add_to_dom_node_list(struct dom_node_list **list_ptr,
232 struct dom_node *node, int position);
234 void done_dom_node_list(struct dom_node_list *list);
236 /* Returns the position or index where the @node should be inserted into the
237 * node @list in order to the list to be alphabetically sorted. Assumes that
238 * @list is already sorted properly. */
239 int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
241 /* Looks up the @node_map for a node matching the requested type and name.
242 * The @subtype maybe be 0 indication unknown subtype and only name should be
243 * tested else it will indicate either the element or attribute private
244 * subtype. */
245 struct dom_node *
246 get_dom_node_map_entry(struct dom_node_list *node_map,
247 enum dom_node_type type, uint16_t subtype,
248 struct dom_string *name);
250 struct dom_node *
251 init_dom_node_(unsigned char *file, int line,
252 struct dom_node *parent, enum dom_node_type type,
253 unsigned char *string, size_t length);
254 #define init_dom_node(type, string, length) init_dom_node_(__FILE__, __LINE__, NULL, type, string, length)
255 #define add_dom_node(parent, type, string, length) init_dom_node_(__FILE__, __LINE__, parent, type, string, length)
257 #define add_dom_element(parent, string, length) \
258 add_dom_node(parent, DOM_NODE_ELEMENT, string, length)
260 static inline struct dom_node *
261 add_dom_attribute(struct dom_node *parent, unsigned char *string, int length,
262 unsigned char *value, size_t valuelen)
264 struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, string, length);
266 if (node && value) {
267 set_dom_string(&node->data.attribute.value, value, valuelen);
270 return node;
273 static inline struct dom_node *
274 add_dom_proc_instruction(struct dom_node *parent, unsigned char *string, int length,
275 unsigned char *instruction, size_t instructionlen)
277 struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string, length);
279 if (node && instruction) {
280 set_dom_string(&node->data.proc_instruction.instruction, instruction, instructionlen);
283 return node;
286 /* Removes the node and all its children and free()s itself */
287 void done_dom_node(struct dom_node *node);
289 /* Returns the name of the node in an allocated string. */
290 struct dom_string *get_dom_node_name(struct dom_node *node);
292 /* Returns the value of the node or NULL if no value is defined for the node
293 * type. */
294 struct dom_string *get_dom_node_value(struct dom_node *node);
296 /* Returns the name used for identifying the node type. */
297 struct dom_string *get_dom_node_type_name(enum dom_node_type type);
299 /* Returns a pointer to a node list containing attributes. */
300 #define get_dom_node_attributes(node) \
301 ((node)->type == DOM_NODE_ELEMENT ? &(node)->data.element.map \
302 : NULL)
304 static inline struct dom_node_list **
305 get_dom_node_list(struct dom_node *parent, struct dom_node *node)
307 switch (parent->type) {
308 case DOM_NODE_DOCUMENT:
309 return &parent->data.document.children;
311 case DOM_NODE_ELEMENT:
312 switch (node->type) {
313 case DOM_NODE_ATTRIBUTE:
314 return &parent->data.element.map;
316 default:
317 return &parent->data.element.children;
320 case DOM_NODE_DOCUMENT_TYPE:
321 switch (node->type) {
322 case DOM_NODE_ENTITY:
323 return &parent->data.document_type.entities;
325 case DOM_NODE_NOTATION:
326 return &parent->data.document_type.notations;
328 default:
329 return NULL;
332 case DOM_NODE_PROCESSING_INSTRUCTION:
333 switch (node->type) {
334 case DOM_NODE_ATTRIBUTE:
335 return &parent->data.proc_instruction.map;
337 default:
338 return NULL;
341 default:
342 return NULL;
346 #endif