Properly handle nested sublists by folding into previous list item.
[htmlpurifier.git] / library / HTMLPurifier / ChildDef / List.php
blobcdaa2893a7e2d534d56ba26ccaeea95b70d84cf1
1 <?php
3 /**
4 * Definition for list containers ul and ol.
5 */
6 class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
8 public $type = 'list';
9 // lying a little bit, so that we can handle ul and ol ourselves
10 // XXX: This whole business with 'wrap' is all a bit unsatisfactory
11 public $elements = array('li' => true, 'ul' => true, 'ol' => true);
12 public function validateChildren($tokens_of_children, $config, $context) {
13 // Flag for subclasses
14 $this->whitespace = false;
16 // if there are no tokens, delete parent node
17 if (empty($tokens_of_children)) return false;
19 // the new set of children
20 $result = array();
22 // current depth into the nest
23 $nesting = 0;
25 // a little sanity check to make sure it's not ALL whitespace
26 $all_whitespace = true;
28 $seen_li = false;
29 $need_close_li = false;
31 foreach ($tokens_of_children as $token) {
32 if (!empty($token->is_whitespace)) {
33 $result[] = $token;
34 continue;
36 $all_whitespace = false; // phew, we're not talking about whitespace
38 if ($nesting == 1 && $need_close_li) {
39 $result[] = new HTMLPurifier_Token_End('li');
40 $nesting--;
41 $need_close_li = false;
44 $is_child = ($nesting == 0);
46 if ($token instanceof HTMLPurifier_Token_Start) {
47 $nesting++;
48 } elseif ($token instanceof HTMLPurifier_Token_End) {
49 $nesting--;
52 if ($is_child) {
53 if ($token->name === 'li') {
54 // good
55 $seen_li = true;
56 } elseif ($token->name === 'ul' || $token->name === 'ol') {
57 // we want to tuck this into the previous li
58 $need_close_li = true;
59 $nesting++;
60 if (!$seen_li) {
61 // create a new li element
62 $result[] = new HTMLPurifier_Token_Start('li');
63 } else {
64 // backtrack until </li> found
65 while(true) {
66 $t = array_pop($result);
67 if ($t instanceof HTMLPurifier_Token_End) {
68 // XXX actually, these invariants could very plausibly be violated
69 // if we are doing silly things with modifying the set of allowed elements.
70 // FORTUNATELY, it doesn't make a difference, since the allowed
71 // elements are hard-coded here!
72 if ($t->name !== 'li') {
73 trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
74 return false;
76 break;
77 } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
78 if ($t->name !== 'li') {
79 trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
80 return false;
82 // XXX this should have a helper for it...
83 $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
84 break;
85 } else {
86 if (!$t->is_whitespace) {
87 trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
88 return false;
93 } else {
94 // start wrapping (this doesn't precisely mimic
95 // browser behavior, but what browsers do is kind of
96 // hard to mimic in a standards compliant way
97 // XXX Actually, this has no impact in practice,
98 // because this gets handled earlier. Arguably,
99 // we should rip out all of that processing
100 $result[] = new HTMLPurifier_Token_Start('li');
101 $nesting++;
102 $seen_li = true;
103 $need_close_li = true;
106 $result[] = $token;
108 if ($need_close_li) {
109 $result[] = new HTMLPurifier_Token_End('li');
111 if (empty($result)) return false;
112 if ($all_whitespace) {
113 return false;
115 if ($tokens_of_children == $result) return true;
116 return $result;
120 // vim: et sw=4 sts=4