Implement matching of element relations for DOM selection
[elinks/images.git] / src / document / dom / select.c
bloba68b72beb1c2838a4a71bb8d4e2768e00b3b79ac
1 /* DOM node selection */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include "elinks.h"
9 #include "document/css/scanner.h"
10 #include "document/dom/dom.h"
11 #include "document/dom/node.h"
12 #include "document/dom/select.h"
13 #include "document/dom/stack.h"
14 #include "util/memory.h"
15 #include "util/scanner.h"
16 #include "util/string.h"
19 /* Selector parsing: */
21 /* Maps the content of a scanner token to a pseudo-class or -element ID. */
22 static enum dom_select_pseudo
23 get_dom_select_pseudo(struct scanner_token *token)
25 static struct {
26 struct dom_string string;
27 enum dom_select_pseudo pseudo;
28 } pseudo_info[] = {
30 #define INIT_DOM_SELECT_PSEUDO_STRING(str, type) \
31 { INIT_DOM_STRING(str, -1), DOM_SELECT_PSEUDO_##type }
33 INIT_DOM_SELECT_PSEUDO_STRING("first-line", FIRST_LINE),
34 INIT_DOM_SELECT_PSEUDO_STRING("first-letter", FIRST_LETTER),
35 INIT_DOM_SELECT_PSEUDO_STRING("selection", SELECTION),
36 INIT_DOM_SELECT_PSEUDO_STRING("after", AFTER),
37 INIT_DOM_SELECT_PSEUDO_STRING("before", BEFORE),
38 INIT_DOM_SELECT_PSEUDO_STRING("link", LINK),
39 INIT_DOM_SELECT_PSEUDO_STRING("visited", VISITED),
40 INIT_DOM_SELECT_PSEUDO_STRING("active", ACTIVE),
41 INIT_DOM_SELECT_PSEUDO_STRING("hover", HOVER),
42 INIT_DOM_SELECT_PSEUDO_STRING("focus", FOCUS),
43 INIT_DOM_SELECT_PSEUDO_STRING("target", TARGET),
44 INIT_DOM_SELECT_PSEUDO_STRING("enabled", ENABLED),
45 INIT_DOM_SELECT_PSEUDO_STRING("disabled", DISABLED),
46 INIT_DOM_SELECT_PSEUDO_STRING("checked", CHECKED),
47 INIT_DOM_SELECT_PSEUDO_STRING("indeterminate", INDETERMINATE),
49 /* Content pseudo-classes: */
51 INIT_DOM_SELECT_PSEUDO_STRING("contains", CONTAINS),
53 /* Structural pseudo-classes: */
55 INIT_DOM_SELECT_PSEUDO_STRING("nth-child", NTH_CHILD),
56 INIT_DOM_SELECT_PSEUDO_STRING("nth-last-child", NTH_LAST_CHILD),
57 INIT_DOM_SELECT_PSEUDO_STRING("first-child", FIRST_CHILD),
58 INIT_DOM_SELECT_PSEUDO_STRING("last-child", LAST_CHILD),
59 INIT_DOM_SELECT_PSEUDO_STRING("only-child", ONLY_CHILD),
61 INIT_DOM_SELECT_PSEUDO_STRING("nth-of-type", NTH_TYPE),
62 INIT_DOM_SELECT_PSEUDO_STRING("nth-last-of-type",NTH_LAST_TYPE),
63 INIT_DOM_SELECT_PSEUDO_STRING("first-of-type", FIRST_TYPE),
64 INIT_DOM_SELECT_PSEUDO_STRING("last-of-type", LAST_TYPE),
65 INIT_DOM_SELECT_PSEUDO_STRING("only-of-type", ONLY_TYPE),
67 INIT_DOM_SELECT_PSEUDO_STRING("root", ROOT),
68 INIT_DOM_SELECT_PSEUDO_STRING("empty", EMPTY),
70 #undef INIT_DOM_SELECT_PSEUDO_STRING
73 struct dom_string string;
74 int i;
76 set_dom_string(&string, token->string, token->length);
78 for (i = 0; i < sizeof_array(pseudo_info); i++)
79 if (!dom_string_casecmp(&pseudo_info[i].string, &string))
80 return pseudo_info[i].pseudo;
82 return DOM_SELECT_PSEUDO_UNKNOWN;
85 /* Parses attribute selector. For example '[foo="bar"]' or '[foo|="boo"]'. */
86 static enum dom_exception_code
87 parse_dom_select_attribute(struct dom_select_node *sel, struct scanner *scanner)
89 struct scanner_token *token = get_scanner_token(scanner);
91 /* Get '['. */
93 if (token->type != '[')
94 return DOM_ERR_INVALID_STATE;
96 /* Get the attribute name. */
98 token = get_next_scanner_token(scanner);
99 if (!token || token->type != CSS_TOKEN_IDENT)
100 return DOM_ERR_SYNTAX;
102 set_dom_string(&sel->node.string, token->string, token->length);
104 /* Get the optional '=' combo or ending ']'. */
106 token = get_next_scanner_token(scanner);
107 if (!token) return DOM_ERR_SYNTAX;
109 switch (token->type) {
110 case ']':
111 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_ANY;
112 return DOM_ERR_NONE;
114 case CSS_TOKEN_SELECT_SPACE_LIST:
115 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
116 break;
118 case CSS_TOKEN_SELECT_HYPHEN_LIST:
119 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_HYPHEN_LIST;
120 break;
122 case CSS_TOKEN_SELECT_BEGIN:
123 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_BEGIN;
124 break;
126 case CSS_TOKEN_SELECT_END:
127 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_END;
128 break;
130 case CSS_TOKEN_SELECT_CONTAINS:
131 sel->match.attribute |= DOM_SELECT_ATTRIBUTE_CONTAINS;
132 break;
134 default:
135 return DOM_ERR_SYNTAX;
138 /* Get the required value. */
140 token = get_next_scanner_token(scanner);
141 if (!token) return DOM_ERR_SYNTAX;
143 switch (token->type) {
144 case CSS_TOKEN_IDENT:
145 case CSS_TOKEN_STRING:
146 set_dom_string(&sel->node.data.attribute.value, token->string, token->length);
147 break;
149 default:
150 return DOM_ERR_SYNTAX;
153 /* Get the ending ']'. */
155 token = get_next_scanner_token(scanner);
156 if (token && token->type == ']')
157 return DOM_ERR_NONE;
159 return DOM_ERR_SYNTAX;
162 /* Parse:
164 * 0n+1 / 1
165 * 2n+0 / 2n
166 * 2n+1
167 * -0n+2
168 * -0n+1 / -1
169 * 1n+0 / n+0 / n
170 * 0n+0
173 /* FIXME: Move somewhere else? util/scanner.h? */
174 static size_t
175 get_scanner_token_number(struct scanner_token *token)
177 size_t number = 0;
179 while (token->length > 0 && isdigit(token->string[0])) {
180 size_t old_number = number;
182 number *= 10;
184 /* -E2BIG */
185 if (old_number > number)
186 return -1;
188 number += token->string[0] - '0';
189 token->string++, token->length--;
192 return number;
195 /* Parses the '(...)' part of ':nth-of-type(...)' and ':nth-child(...)'. */
196 static enum dom_exception_code
197 parse_dom_select_nth_arg(struct dom_select_nth_match *nth, struct scanner *scanner)
199 struct scanner_token *token = get_next_scanner_token(scanner);
200 int sign = 1;
201 int number = -1;
203 if (!token || token->type != '(')
204 return DOM_ERR_SYNTAX;
206 token = get_next_scanner_token(scanner);
207 if (!token)
208 return DOM_ERR_SYNTAX;
210 switch (token->type) {
211 case CSS_TOKEN_IDENT:
212 if (scanner_token_contains(token, "even")) {
213 nth->step = 2;
214 nth->index = 0;
216 } else if (scanner_token_contains(token, "odd")) {
217 nth->step = 2;
218 nth->index = 1;
220 } else {
221 /* Check for 'n' ident below. */
222 break;
225 if (skip_css_tokens(scanner, ')'))
226 return DOM_ERR_NONE;
228 return DOM_ERR_SYNTAX;
230 case '-':
231 sign = -1;
233 token = get_next_scanner_token(scanner);
234 if (!token) return DOM_ERR_SYNTAX;
236 if (token->type != CSS_TOKEN_IDENT)
237 break;
239 if (token->type != CSS_TOKEN_NUMBER)
240 return DOM_ERR_SYNTAX;
241 /* Fall-through */
243 case CSS_TOKEN_NUMBER:
244 number = get_scanner_token_number(token);
245 if (number < 0)
246 return DOM_ERR_INVALID_STATE;
248 token = get_next_scanner_token(scanner);
249 if (!token) return DOM_ERR_SYNTAX;
250 break;
252 default:
253 return DOM_ERR_SYNTAX;
256 /* The rest can contain n+ part */
257 switch (token->type) {
258 case CSS_TOKEN_IDENT:
259 if (!scanner_token_contains(token, "n"))
260 return DOM_ERR_SYNTAX;
262 nth->step = sign * number;
264 token = get_next_scanner_token(scanner);
265 if (!token) return DOM_ERR_SYNTAX;
267 if (token->type != '+')
268 break;
270 token = get_next_scanner_token(scanner);
271 if (!token) return DOM_ERR_SYNTAX;
273 if (token->type != CSS_TOKEN_NUMBER)
274 break;
276 number = get_scanner_token_number(token);
277 if (number < 0)
278 return DOM_ERR_INVALID_STATE;
280 nth->index = sign * number;
281 break;
283 default:
284 nth->step = 0;
285 nth->index = sign * number;
288 if (skip_css_tokens(scanner, ')'))
289 return DOM_ERR_NONE;
291 return DOM_ERR_SYNTAX;
294 /* Parse a pseudo-class or -element with the syntax: ':<ident>'. */
295 static enum dom_exception_code
296 parse_dom_select_pseudo(struct dom_select *select, struct dom_select_node *sel,
297 struct scanner *scanner)
299 struct scanner_token *token = get_scanner_token(scanner);
300 enum dom_select_pseudo pseudo;
301 enum dom_exception_code code;
303 /* Skip double :'s in front of some pseudo's (::first-line, etc.) */
304 do {
305 token = get_next_scanner_token(scanner);
306 } while (token && token->type == ':');
308 if (!token || token->type != CSS_TOKEN_IDENT)
309 return DOM_ERR_SYNTAX;
311 pseudo = get_dom_select_pseudo(token);
312 switch (pseudo) {
313 case DOM_SELECT_PSEUDO_UNKNOWN:
314 return DOM_ERR_NOT_FOUND;
316 case DOM_SELECT_PSEUDO_CONTAINS:
317 /* FIXME: E:contains("text") */
318 break;
320 case DOM_SELECT_PSEUDO_NTH_CHILD:
321 case DOM_SELECT_PSEUDO_NTH_LAST_CHILD:
322 code = parse_dom_select_nth_arg(&sel->nth_child, scanner);
323 if (code != DOM_ERR_NONE)
324 return code;
326 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
327 break;
329 case DOM_SELECT_PSEUDO_FIRST_CHILD:
330 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
331 set_dom_select_nth_match(&sel->nth_child, 0, 1);
332 break;
334 case DOM_SELECT_PSEUDO_LAST_CHILD:
335 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
336 set_dom_select_nth_match(&sel->nth_child, 0, -1);
337 break;
339 case DOM_SELECT_PSEUDO_ONLY_CHILD:
340 sel->match.element |= DOM_SELECT_ELEMENT_NTH_CHILD;
341 set_dom_select_nth_match(&sel->nth_child, 0, 0);
342 break;
344 case DOM_SELECT_PSEUDO_NTH_TYPE:
345 case DOM_SELECT_PSEUDO_NTH_LAST_TYPE:
346 code = parse_dom_select_nth_arg(&sel->nth_type, scanner);
347 if (code != DOM_ERR_NONE)
348 return code;
350 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
351 break;
353 case DOM_SELECT_PSEUDO_FIRST_TYPE:
354 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
355 set_dom_select_nth_match(&sel->nth_type, 0, 1);
356 break;
358 case DOM_SELECT_PSEUDO_LAST_TYPE:
359 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
360 set_dom_select_nth_match(&sel->nth_type, 0, -1);
361 break;
363 case DOM_SELECT_PSEUDO_ONLY_TYPE:
364 sel->match.element |= DOM_SELECT_ELEMENT_NTH_TYPE;
365 set_dom_select_nth_match(&sel->nth_type, 0, 0);
366 break;
368 case DOM_SELECT_PSEUDO_ROOT:
369 sel->match.element |= DOM_SELECT_ELEMENT_ROOT;
370 break;
372 case DOM_SELECT_PSEUDO_EMPTY:
373 sel->match.element |= DOM_SELECT_ELEMENT_EMPTY;
374 break;
376 default:
377 /* It's a bitflag! */
378 select->pseudo |= pseudo;
381 return DOM_ERR_NONE;
384 /* The element relation flags are mutual exclusive. This macro can be used
385 * for checking if anyflag is set. */
386 #define get_element_relation(sel) \
387 ((sel)->match.element & DOM_SELECT_RELATION_FLAGS)
389 /* Parse a CSS3 selector and add selector nodes to the @select struct. */
390 static enum dom_exception_code
391 parse_dom_select(struct dom_select *select, struct dom_stack *stack,
392 unsigned char *string, int length)
394 struct scanner scanner;
395 struct dom_select_node sel;
397 init_scanner(&scanner, &css_scanner_info, string, string + length);
399 memset(&sel, 0, sizeof(sel));
401 while (scanner_has_tokens(&scanner)) {
402 struct scanner_token *token = get_scanner_token(&scanner);
403 enum dom_exception_code code;
404 struct dom_select_node *select_node;
406 assert(token);
408 if (token->type == '{'
409 || token->type == '}'
410 || token->type == ';'
411 || token->type == ',')
412 break;
414 /* Examine the selector fragment */
416 switch (token->type) {
417 case CSS_TOKEN_IDENT:
418 sel.node.type = DOM_NODE_ELEMENT;
419 set_dom_string(&sel.node.string, token->string, token->length);
420 if (token->length == 1 && token->string[0] == '*')
421 sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
422 break;
424 case CSS_TOKEN_HASH:
425 case CSS_TOKEN_HEX_COLOR:
426 /* ID fragment */
427 sel.node.type = DOM_NODE_ATTRIBUTE;
428 sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
429 /* Skip the leading '#'. */
430 token->string++, token->length--;
431 break;
433 case '[':
434 sel.node.type = DOM_NODE_ATTRIBUTE;
435 code = parse_dom_select_attribute(&sel, &scanner);
436 if (code != DOM_ERR_NONE)
437 return code;
438 break;
440 case '.':
441 token = get_next_scanner_token(&scanner);
442 if (!token || token->type != CSS_TOKEN_IDENT)
443 return DOM_ERR_SYNTAX;
445 sel.node.type = DOM_NODE_ATTRIBUTE;
446 sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
447 set_dom_string(&sel.node.string, "class", -1);
448 set_dom_string(&sel.node.data.attribute.value, token->string, token->length);
449 break;
451 case ':':
452 code = parse_dom_select_pseudo(select, &sel, &scanner);
453 if (code != DOM_ERR_NONE)
454 return code;
455 break;
457 case '>':
458 if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
459 return DOM_ERR_SYNTAX;
460 sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD;
461 break;
463 case '+':
464 if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
465 return DOM_ERR_SYNTAX;
466 sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT;
467 break;
469 case '~':
470 if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
471 return DOM_ERR_SYNTAX;
472 sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT;
473 break;
475 default:
476 return DOM_ERR_SYNTAX;
479 skip_scanner_token(&scanner);
481 if (sel.node.type == DOM_NODE_UNKNOWN)
482 continue;
484 WDBG("Adding %s: %.*s", (sel.node.type == DOM_NODE_ELEMENT) ? "element" : "attr", sel.node.string.length, sel.node.string.string);
485 /* FIXME: Use the stack to push added nodes and to get the
486 * parent node. */
487 select_node = mem_calloc(1, sizeof(*select_node));
488 copy_struct(select_node, &sel);
490 if (select_node->node.parent) {
491 struct dom_node *node = &select_node->node;
492 struct dom_node *parent = node->parent;
493 struct dom_node_list **list = get_dom_node_list(parent, node);
494 int sort = (node->type == DOM_NODE_ATTRIBUTE);
495 int index;
497 assertm(list, "Adding node to bad parent",
498 get_dom_node_type_name(node->type),
499 get_dom_node_type_name(parent->type));
501 index = *list && (*list)->size > 0 && sort
502 ? get_dom_node_map_index(*list, node) : -1;
504 if (!add_to_dom_node_list(list, node, index)) {
505 done_dom_node(node);
506 return DOM_ERR_INVALID_STATE;
508 } else {
509 assert(!select->selector);
510 select->selector = select_node;
513 memset(&sel, 0, sizeof(sel));
514 sel.node.parent = &select_node->node;
517 if (select->selector)
518 return DOM_ERR_NONE;
520 return DOM_ERR_INVALID_STATE;
523 /* Basically this is just a wrapper for parse_dom_select() to ease error
524 * handling. */
525 struct dom_select *
526 init_dom_select(enum dom_select_syntax syntax,
527 unsigned char *string, int length)
529 struct dom_select *select = mem_calloc(1, sizeof(select));
530 struct dom_stack stack;
531 enum dom_exception_code code;
533 init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
535 code = parse_dom_select(select, &stack, string, length);
536 done_dom_stack(&stack);
538 if (code == DOM_ERR_NONE)
539 return select;
541 done_dom_select(select);
543 return NULL;
546 void
547 done_dom_select(struct dom_select *select)
549 if (select->selector) {
550 struct dom_node *node = (struct dom_node *) select->selector;
552 /* This will recursively free all children select nodes. */
553 done_dom_node(node);
556 mem_free(select);
560 /* DOM node selection: */
562 /* This struct stores data related to the 'application' of a DOM selector
563 * on a DOM tree or stream. */
564 struct dom_select_data {
565 /* The selector matching stack. The various selector nodes are pushed
566 * onto this stack as they are matched (and later popped when they are
567 * no longer 'reachable', that is, has been popped from the DOM tree or
568 * stream. This way the selector can match each selector node multiple
569 * times and the selection is a simple matter of matching the current
570 * node against each state on this stack. */
571 struct dom_stack stack;
573 /* Reference to the selector. */
574 struct dom_select *select;
576 /* The list of nodes who have been matched / selected. */
577 struct dom_node_list *list;
580 /* This state struct is used for the select data stack and holds info about the
581 * node that was matched. */
582 struct dom_select_state {
583 /* The matched node. This is always an element node. */
584 struct dom_node *node;
587 /* Get a child node of a given type. By design, a selector node can
588 * only have one child per type of node. */
589 static struct dom_select_node *
590 get_child_dom_select_node(struct dom_select_node *selector,
591 enum dom_node_type type)
593 struct dom_node_list *children = selector->node.data.element.children;
594 struct dom_node *node;
595 int index;
597 if (!children)
598 return NULL;
600 foreach_dom_node (children, node, index) {
601 if (node->type == type)
602 return (struct dom_select_node *) node;
605 return NULL;
609 #define has_attribute_match(selector, name) \
610 ((selector)->match.attribute & (name))
612 static int
613 match_attribute_value(struct dom_select_node *selector, struct dom_node *node)
615 struct dom_string *selvalue = &selector->node.data.attribute.value;
616 struct dom_string *value = &node->data.attribute.value;
618 /* The attribute selector value should atleast be contained in the
619 * attribute value. */
620 if (value->length < selvalue->length)
621 return 0;
623 /* FIXME: Combine the 3 following to use an offset to specify where in
624 * value, selvalue should match. */
626 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_EXACT)
627 && dom_string_casecmp(value, selvalue))
628 return 0;
630 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_BEGIN))
631 return 0;
633 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_END))
634 return 0;
636 /* FIXME: Combine the 3 following to simply strstr()-search the value
637 * and based on a char group check if it is separated either by
638 * begining, end or the values in the char group: '-' for
639 * DOM_SELECT_ATTRIBUTE_HYPHEN_LIST, etc. */
641 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_SPACE_LIST))
642 return 0;
644 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_HYPHEN_LIST))
645 return 0;
647 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_CONTAINS))
648 return 0;
650 return 1;
653 /* Match the attribute of an element @node against attribute selector nodes
654 * of a given @base. */
655 static int
656 match_attribute_selectors(struct dom_select_node *base, struct dom_node *node)
658 struct dom_node_list *attrs = node->data.element.map;
659 struct dom_node_list *selnodes = base->node.data.element.map;
660 struct dom_node *selnode;
661 size_t index;
663 assert(base->node.type == DOM_NODE_ELEMENT
664 && node->type == DOM_NODE_ELEMENT);
666 /* If there are no attribute selectors that is a clean match ... */
667 if (!selnodes)
668 return 1;
670 /* ... the opposite goes if there are no attributes to match. */
671 if (!attrs)
672 return 0;
674 foreach_dom_node (selnodes, selnode, index) {
675 struct dom_select_node *selector = (void *) selnode;
676 struct dom_node *attr;
678 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ID)) {
679 size_t idindex;
681 attr = NULL;
682 foreach_dom_node (attrs, attr, idindex) {
683 if (attr->data.attribute.id)
684 break;
687 if (!is_dom_node_list_member(attrs, idindex))
688 attr = NULL;
690 } else {
691 attr = get_dom_node_map_entry(attrs, DOM_NODE_ATTRIBUTE,
692 selnode->data.attribute.type,
693 &selnode->string);
696 if (!attr)
697 return 0;
699 if (has_attribute_match(selector, DOM_SELECT_ATTRIBUTE_ANY))
700 continue;
702 if (!match_attribute_value(selector, attr))
703 return 0;
706 return 1;
709 /* XXX: Assume the first context is the one! */
710 #define get_dom_select_state(stack, state) \
711 ((struct dom_select_state *) get_dom_stack_state_data((stack)->contexts, state))
713 static int
714 match_element_relation(struct dom_select_node *selector, struct dom_node *node,
715 struct dom_stack *stack)
717 struct dom_stack_state *state;
718 enum dom_select_element_match relation = get_element_relation(selector);
719 int i, index;
721 assert(relation);
723 /* When matching any relation there must be a parent, either so that
724 * the node is a descendant or it is possible to check for siblings. */
725 if (!node->parent)
726 return 0;
728 if (relation != DOM_SELECT_RELATION_DIRECT_CHILD) {
729 /* When looking for preceeding siblings of the current node,
730 * the current node cannot be first or not in the list (-1). */
731 index = get_dom_node_list_index(node->parent, node);
732 if (index < 1)
733 return 0;
734 } else {
735 index = -1;
738 /* Find states which hold the parent of the current selector
739 * and check if the parent selector's node is the parent of the
740 * current node. */
741 foreachback_dom_stack_state(stack, state, i) {
742 struct dom_node *selnode;
744 /* We are only interested in states which hold the parent of
745 * the current selector. */
746 if (state->node != selector->node.parent)
747 continue;
749 selnode = get_dom_select_state(stack, state)->node;
751 if (relation == DOM_SELECT_RELATION_DIRECT_CHILD) {
752 /* Check if the parent selector's node is the parent of
753 * the current node. */
754 if (selnode == node->parent)
755 return 1;
757 } else {
758 int sibindex;
760 /* Check if they are siblings. */
761 if (selnode->parent != node->parent)
762 continue;
764 sibindex = get_dom_node_list_index(node->parent, selnode);
766 if (relation == DOM_SELECT_RELATION_DIRECT_ADJACENT) {
767 /* Check if the sibling node immediately
768 * preceeds the current node. */
769 if (sibindex + 1 == index)
770 return 1;
772 } else { /* DOM_SELECT_RELATION_INDIRECT_ADJACENT */
773 /* Check if the sibling node preceeds the
774 * current node. */
775 if (sibindex < index)
776 return 1;
781 return 0;
784 #define has_element_match(selector, name) \
785 ((selector)->match.element & (name))
787 static int
788 match_element_selector(struct dom_select_node *selector, struct dom_node *node,
789 struct dom_stack *stack)
791 assert(node && node->type == DOM_NODE_ELEMENT);
793 if (!has_element_match(selector, DOM_SELECT_ELEMENT_UNIVERSAL)
794 && dom_node_casecmp(&selector->node, node))
795 return 0;
797 if (get_element_relation(selector) != DOM_SELECT_RELATION_DESCENDANT
798 && !match_element_relation(selector, node, stack))
799 return 0;
801 /* Root nodes either have no parents or are the single child of the
802 * document node. */
803 if (has_element_match(selector, DOM_SELECT_ELEMENT_ROOT)
804 && node->parent) {
805 if (node->parent->type != DOM_NODE_DOCUMENT
806 || node->parent->data.document.children->size > 1)
807 return 0;
810 if (has_element_match(selector, DOM_SELECT_ELEMENT_EMPTY)
811 && node->data.element.children
812 && node->data.element.children->size > 0)
813 return 0;
815 if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_CHILD)) {
816 /* FIXME */
817 return 0;
820 if (has_element_match(selector, DOM_SELECT_ELEMENT_NTH_TYPE)) {
821 /* FIXME */
822 return 0;
825 /* Check attribute selectors. */
826 if (selector->node.data.element.map
827 && !match_attribute_selectors(selector, node))
828 return 0;
830 return 1;
834 #define get_dom_select_data(stack) ((stack)->current->data)
836 /* Matches an element node being visited against the current selector stack. */
837 static void
838 dom_select_push_element(struct dom_stack *stack, struct dom_node *node, void *data)
840 struct dom_select_data *select_data = get_dom_select_data(stack);
841 struct dom_stack_state *state;
842 int pos;
844 WDBG("Push element %.*s.", node->string.length, node->string.string);
846 foreach_dom_stack_state(&select_data->stack, state, pos) {
847 struct dom_select_node *selector = (void *) state->node;
849 /* FIXME: Since the same dom_select_node can be multiple times
850 * on the select_data->stack, cache what select nodes was
851 * matches so that it is only checked once. */
853 if (!match_element_selector(selector, node, &select_data->stack))
854 continue;
856 WDBG("Matched element: %.*s.", node->string.length, node->string.string);
857 /* This node is matched, so push the next selector node to
858 * match on the stack. */
859 selector = get_child_dom_select_node(selector, DOM_NODE_ELEMENT);
860 if (selector)
861 push_dom_node(&select_data->stack, &selector->node);
865 /* Ensures that nodes, no longer 'reachable' on the stack do not have any
866 * states associated with them on the select data stack. */
867 static void
868 dom_select_pop_element(struct dom_stack *stack, struct dom_node *node, void *data)
870 struct dom_select_data *select_data = get_dom_select_data(stack);
871 struct dom_stack_state *state;
872 int index;
874 WDBG("Pop element: %.*s", node->string.length, node->string.string);
875 stack = &select_data->stack;
877 foreachback_dom_stack_state (stack, state, index) {
878 struct dom_select_node *selector = (void *) state->node;
879 struct dom_select_state *select_state;
881 /* XXX: Assume that it is the first stack context! */
882 select_state = get_dom_stack_state_data(stack->contexts, state);
883 if (select_state->node == node) {
884 pop_dom_state(stack, state);
885 WDBG("Remove element.");
886 continue;
889 /* FIXME: Pop states that no longer lives up to a relation. */
890 switch (get_element_relation(selector)) {
891 case DOM_SELECT_RELATION_DIRECT_CHILD: /* E > F */
892 case DOM_SELECT_RELATION_DIRECT_ADJACENT: /* E + F */
893 case DOM_SELECT_RELATION_INDIRECT_ADJACENT: /* E ~ F */
894 case DOM_SELECT_RELATION_DESCENDANT: /* E F */
895 default:
896 break;
901 /* For now this is only for matching the ':contains(<string>)' pseudo-class.
902 * Any node which can contain text and thus characters from the given <string>
903 * are handled in this common callback. */
904 static void
905 dom_select_push_text(struct dom_stack *stack, struct dom_node *node, void *data)
907 struct dom_select_data *select_data = get_dom_select_data(stack);
908 struct dom_stack_state *state = get_dom_stack_top(&select_data->stack);
909 struct dom_select_node *selector = (void *) state->node;
910 struct dom_select_node *text_sel = get_child_dom_select_node(selector, DOM_NODE_TEXT);
911 struct dom_string *text;
913 WDBG("Text node: %d chars", node->string.length);
915 if (!text_sel)
916 return;
918 text = &text_sel->node.string;
920 switch (node->type) {
921 case DOM_NODE_TEXT:
922 case DOM_NODE_CDATA_SECTION:
923 case DOM_NODE_ENTITY_REFERENCE:
924 break;
925 default:
926 ERROR("Unhandled type");
930 /* Context info for interacting with the DOM tree or stream stack. */
931 static struct dom_stack_context_info dom_select_context_info = {
932 /* Object size: */ 0,
933 /* Push: */
935 /* */ NULL,
936 /* DOM_NODE_ELEMENT */ dom_select_push_element,
937 /* DOM_NODE_ATTRIBUTE */ NULL,
938 /* DOM_NODE_TEXT */ dom_select_push_text,
939 /* DOM_NODE_CDATA_SECTION */ dom_select_push_text,
940 /* DOM_NODE_ENTITY_REFERENCE */ dom_select_push_text,
941 /* DOM_NODE_ENTITY */ NULL,
942 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
943 /* DOM_NODE_COMMENT */ NULL,
944 /* DOM_NODE_DOCUMENT */ NULL,
945 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
946 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
947 /* DOM_NODE_NOTATION */ NULL,
949 /* Pop: */
951 /* */ NULL,
952 /* DOM_NODE_ELEMENT */ dom_select_pop_element,
953 /* DOM_NODE_ATTRIBUTE */ NULL,
954 /* DOM_NODE_TEXT */ NULL,
955 /* DOM_NODE_CDATA_SECTION */ NULL,
956 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
957 /* DOM_NODE_ENTITY */ NULL,
958 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
959 /* DOM_NODE_COMMENT */ NULL,
960 /* DOM_NODE_DOCUMENT */ NULL,
961 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
962 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
963 /* DOM_NODE_NOTATION */ NULL,
967 /* Context info related to the private select data stack of matched nodes. */
968 static struct dom_stack_context_info dom_select_data_context_info = {
969 /* Object size: */ sizeof(struct dom_select_state),
970 /* Push: */
972 /* */ NULL,
973 /* DOM_NODE_ELEMENT */ NULL,
974 /* DOM_NODE_ATTRIBUTE */ NULL,
975 /* DOM_NODE_TEXT */ NULL,
976 /* DOM_NODE_CDATA_SECTION */ NULL,
977 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
978 /* DOM_NODE_ENTITY */ NULL,
979 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
980 /* DOM_NODE_COMMENT */ NULL,
981 /* DOM_NODE_DOCUMENT */ NULL,
982 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
983 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
984 /* DOM_NODE_NOTATION */ NULL,
986 /* Pop: */
988 /* */ NULL,
989 /* DOM_NODE_ELEMENT */ NULL,
990 /* DOM_NODE_ATTRIBUTE */ NULL,
991 /* DOM_NODE_TEXT */ NULL,
992 /* DOM_NODE_CDATA_SECTION */ NULL,
993 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
994 /* DOM_NODE_ENTITY */ NULL,
995 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
996 /* DOM_NODE_COMMENT */ NULL,
997 /* DOM_NODE_DOCUMENT */ NULL,
998 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
999 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
1000 /* DOM_NODE_NOTATION */ NULL,
1005 struct dom_node_list *
1006 select_dom_nodes(struct dom_select *select, struct dom_node *root)
1008 struct dom_select_data select_data;
1009 struct dom_stack stack;
1011 memset(&select_data, 0, sizeof(select_data));
1013 select_data.select = select;;
1015 init_dom_stack(&stack, DOM_STACK_KEEP_NODES);
1016 add_dom_stack_context(&stack, &select_data,
1017 &dom_select_context_info);
1019 init_dom_stack(&select_data.stack, DOM_STACK_KEEP_NODES);
1020 add_dom_stack_context(&select_data.stack, &select_data,
1021 &dom_select_data_context_info);
1023 if (push_dom_node(&select_data.stack, &select->selector->node)) {
1024 get_dom_stack_top(&select_data.stack)->immutable = 1;
1025 walk_dom_nodes(&stack, root);
1028 done_dom_stack(&select_data.stack);
1029 done_dom_stack(&stack);
1031 return select_data.list;