Update code_sniffer build.xml file to be executable on our system
[phpbb.git] / phpBB / includes / bbcode / bbcode_parser_base.php
blobaf74f6c38eb51cba4e7bd0ea38b70a2e7da275fb
1 <?php
2 /**
4 * @package phpBB3
5 * @version $Id$
6 * @copyright (c) 2005 phpBB Group
7 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
9 */
11 /**
12 * @ignore
14 if (!defined('IN_PHPBB'))
16 exit;
20 /**
21 * A stack based BBCode parser.
24 abstract class phpbb_bbcode_parser_base
26 /**
27 * Array holding the BBCode definitions.
29 * This is all the documentation you'll find!
31 * 'tagName' => array( // The tag name must start with a letter and can consist only of letters and numbers.
32 * 'replace' => 'The open tag is replaced with this. "{attribute}" - Will be replaced with an existing attribute.',
33 * // Optional
34 * 'replace_func' => 'function_name', // Open tag is replaced with the the value that this function returns. replace will not be used. The function will get the arguments given to the tag and the tag definition. It is your responsibility to validate the arguments.
35 * 'close' => 'The close tag is replaced by this. If set to bool(false) the tag won't need a closing tag.',
36 * // Optional
37 * 'close_shadow' => true, // If set, no closing tag will be needed, but the value close will be added as soon as the parent tag is closed or a tag which is not allowed in the tag is encountered.
38 * // Optional
39 * 'close_func' => 'function_name', // Close tag is replaced with the the value that this function returns. close will not be used. If close is set to bool this might not function as expected.
40 * 'attributes' => array(
41 * 'attributeName' => array(
42 * 'replace' => 'Attribute replacement. Use string defined in self::$attr_value_replace as a replacement for the attributes value',
43 * 'type_check' => 'function_name', // Optional. Function name to check if the value of the attribute is allowed. It must return bool or a corrected string. It must accept the attribute value string.
44 * 'required' => true, // Optional. The attribute must be set and not empty for the tag to be parsed.
45 * ),
46 * // ...
47 * ),
48 * 'children' => array(
49 * true, // true allows all tags to be a child of this tag except for the other tags in the array. false allows only the tags in the array.
50 * 'tag2' => true,
51 * // ...
52 * ),
53 * 'parents' => array(true), // Same as 'children'.
54 * // Optional
55 * 'content_func' => 'function_name', // Applies function to the contents of the tag and replaces it with the output. Used only when the tag does not allow children. It must return the replacement string and accept the input string. This is not like HTML...
56 * ),
57 * 'tag2' => array(
58 * // ...
60 * NOTE: Use "_" as the name of the attribute assigned to the tag itself. (eg. form the tag [tag="value"] "_" will hold "value")
61 * NOTE: Use "__" for the content of a tag without children. (eg. for [u]something[/u] "__" will hold "something") This is not like HTML...
62 * NOTE: The following special tags exist: "__url" (child), "__smiley" (child) and "__global" (parent). They are to be used in the child/parent allowed/disallowed lists.
63 * @var array
65 protected $tags = array();
67 /**
68 * The smilies which are to be "parsed".
70 * Smilies are treated the same way as BBCodes (though BBcodes have precedence).
71 * Use "__smiley" to allow/disallow them in tags. Smileys can only be children.
73 * 'smiley' => 'replacement'
75 * @var array
77 protected $smilies = array();
79 /**
80 * Callback to be applied to all text nodes (in second_pass).
82 * @var mixed
84 protected $text_callback = null;
86 /**
87 * Used by first_pass and second_pass
89 * @var array
91 private $stack = array();
93 /**
94 * Regex to match BBCode tags.
96 * @var string
98 private $tag_regex = '\[(/?)([a-z][a-z0-9]*)(?:=(\'[^\']*\'|"[^"]*"))?((?: [a-z]+(?:\s?=\s?(?:\'[^\']*\'|"[^"]*"))?)*)\]';
101 * Regex for URL's
103 * @var string
105 private $url_regex = '(?>([a-z+]{2,}://|www\.))(?:[a-z0-9]+(?:\.[a-z0-9]+)?@)?(?:(?:[a-z](?:[a-z0-9]|(?<!-)-)*[a-z0-9])(?:\.[a-z](?:[a-z0-9]|(?<!-)-)*[a-z0-9])+|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(?:/[^\\/:?*"<>|\n]*[a-z0-9])*/?(?:\?[a-z0-9_.%]+(?:=[a-z0-9_.%:/+-]*)?(?:&[a-z0-9_.%]+(?:=[a-z0-9_.%:/+-]*)?)*)?(?:#[a-z0-9_%.]+)?';
108 * Regex to match attribute&value pairs.
110 * @var string
112 private $attribute_regex = '~([a-z]+)(?:\s?=\s?((?:\'[^\']*?\'|"[^"]*?")))?~i';
115 * Delimiter's ASCII code.
117 * @var int
119 private $delimiter = 0;
122 * This string will be replaced by the attribute value.
124 * @var string
126 private $attr_value_replace = '%s';
129 * First pass result.
131 * @var array
133 private $parsed = array();
134 private $parse_pos = 1;
137 * Parse flags
139 * @var int
141 protected $flags;
144 * Types
146 const TYPE_TAG = 1;
147 const TYPE_TAG_SIMPLE = 2;
148 const TYPE_CTAG = 3;
149 const TYPE_ABSTRACT_SMILEY = 4;
150 const TYPE_ABSTRACT_URL = 5;
153 * Feature flags
155 const PARSE_BBCODE = 1;
156 const PARSE_URLS = 2;
157 const PARSE_SMILIES = 4;
160 * Tag Backreferences.
163 const MATCH_CLOSING_TAG = 1;
164 const MATCH_TAG_NAME = 2;
165 const MATCH_SHORT_ARG = 3;
166 const MATCH_ARGS = 4;
169 * Argument backreferences
172 const MATCH_ARG_NAME = 1;
173 const MATCH_ARG_VALUE = 2;
176 * Constructor.
179 public function __construct()
181 $this->delimiter = chr($this->delimiter);
182 $this->flags = self::PARSE_BBCODE | self::PARSE_URLS | self::PARSE_SMILIES;
186 * Returns a string ready for storage and/or second_pass
188 * @param string $string
189 * @return string
191 public function first_pass($string)
193 $this->stack = array();
194 $this->parsed = array();
195 $this->parse_pos = 1;
197 // Remove the delimiter from the string.
198 $string = str_replace($this->delimiter, '', $string);
200 $smilies = implode('|',array_map(array($this, 'regex_quote'), array_keys($this->smilies)));
202 // Make a regex out of the following items:
203 $regex_parts = array(
204 $this->tag_regex,
205 $this->url_regex,
206 $smilies,
209 $regex = '~' . implode('|', $regex_parts) . '~i';
211 // Do most of the job here...
212 $string = preg_replace_callback($regex, array($this, 'first_pass_tag_check'), $string);
214 // Close all remaining open tags.
215 if (sizeof($this->stack) > 0)
217 $string .= $this->close_tags($this->stack);
218 $this->stack = array();
221 // Make a serialized array out of it.
222 $string = explode($this->delimiter, $string);
224 if (sizeof($string) > 1)
226 $parsed = array();
228 $this->parse_pos = 0;
230 end($this->parsed);
231 reset($string);
232 foreach ($this->parsed as $key => $val)
234 $parsed[key($string) * 2] = current($string);
235 $parsed[$key] = $val;
236 next($string);
239 $this->parsed = array();
240 $this->parse_pos = 1;
242 else
244 $parsed = $string;
247 return serialize($parsed);
251 * Opposite function to first_pass.
252 * Changes the output of first_pass back to BBCode.
254 * @param string $string
255 * @return string
256 * @todo make sure this works after the change of first_pass data storage.
258 public function first_pass_decompile($string)
260 $string = unserialize($string);
261 for ($i = 1, $n = sizeof($string); $i < $n; $i += 2)
263 $string[$i] = $this->decompile_tag($tag);
265 return implode('', $string);
269 * Removes first_pass data. This removes all BBCode tags. To reverse the effect of first_pass use first_pass_decompile
271 * @param string $string
272 * @return string
274 public function remove_first_pass_data($string)
276 $decompiled = array();
277 $compiled = unserialize($string);
278 for ($i = 0, $n = sizeof($compiled); $i < $n; $n += 2)
280 $decompiled[] = $compiled[$i];
282 return implode('', $decompiled);
286 * The function takes the result of first_pass and returnes the string fully parsed.
288 * @param string $string
289 * @return string
291 public function second_pass($string)
293 $this->stack = array();
295 $string = unserialize($string);
297 if (!is_null($this->text_callback))
299 for ($i = 0, $n = sizeof($string); $i < $n; $i += 2)
301 $string[$i] = call_user_func($this->text_callback, $string[$i]);
305 for ($i = 1, $n = sizeof($string); $i < $n; $i += 2)
308 $tag_data = $string[$i];
309 $type = &$tag_data[0];
310 $tag = $tag_data[1];
311 $tag_definition = &$this->tags[$tag];
313 if ($this->flags & self::PARSE_BBCODE && $type != self::TYPE_ABSTRACT_URL && $type != self::TYPE_ABSTRACT_SMILEY && $type != self::TYPE_CTAG)
315 // These apply to opening tags and tags without closing tags.
317 // Is the tag still allowed as a child?
318 // This is still needed!
319 if (sizeof($this->stack) && isset($this->tags[$this->stack[0]['name']]['close_shadow']) && !is_bool($this->tags[$this->stack[0]['name']]['close']) && !$this->child_allowed($tag))
321 // The previous string won't be edited anymore.
322 $string[$i - 1] .= $this->tags[$this->stack[0]['name']]['close'];
323 array_shift($this->stack);
326 // Add tag to stack only if it needs a closing tag.
327 if ($tag_definition['close'] !== false || !isset($tag_definition['close_shadow']))
329 array_unshift($this->stack, array('name' => $tag, 'attributes' => array()));
333 switch ($type)
335 case self::TYPE_ABSTRACT_URL:
337 if ($this->flags & self::PARSE_URLS && $this->child_allowed('__url'))
339 $string[$i] = '<a href="' . $tag_data[1] . '">' . $tag_data[1] . '</a>';
341 else
343 $string[$i] = $tag_data[1];
346 break;
348 case self::TYPE_ABSTRACT_SMILEY:
350 if ($this->flags & self::PARSE_SMILIES && $this->child_allowed('__smiley'))
352 $string[$i] = $this->smilies[$tag_data[1]];
354 else
356 $string[$i] = $tag_data[1];
359 break;
361 case self::TYPE_CTAG:
363 if (($this->flags & self::PARSE_BBCODE) == 0)
365 $string[$i] = $this->decompile_tag($string[$i]);
366 break;
369 // It must be the last one as tag nesting was checked in the first pass.
370 // An exception to this rule was created with adding the new type of tag without closing tag.
371 if (isset($this->tags[$this->stack[0]['name']]['close_shadow']))
373 if (!is_bool($this->tags[$this->stack[0]['name']]['close']))
375 // the previous string won't be edited anymore.
376 $string[$i - 1] .= $this->tags[$this->stack[0]['name']]['close'];
378 else if (isset($tag_definition['close_func']))
380 $string[$i - 1] .= call_user_func($tag_definition['close_func'], $this->stack[0]['attributes']);
382 array_shift($this->stack);
385 if ($tag != $this->stack[0]['name'])
387 $string[$i] = $this->decompile_tag('/' . $tag);
389 else if (isset($tag_definition['close_shadow']))
391 $string[$i] = '';
393 else if ($tag_definition['close'] !== false || !isset($tag_definition['close_shadow']))
395 if (isset($tag_definition['close_func']))
397 $string[$i] = call_user_func($tag_definition['close_func'], $this->stack[0]['attributes']);
399 else
401 $string[$i] = $tag_definition['close'];
403 array_shift($this->stack);
405 else
407 $string[$i] = '';
410 break;
412 case self::TYPE_TAG_SIMPLE:
414 if (($this->flags & self::PARSE_BBCODE) == 0)
416 $string[$i] = $this->decompile_tag($string[$i]);
417 break;
420 if ($tag_definition['children'][0] == false && sizeof($tag_definition['children']) == 1)
422 if (isset($tag_definition['attributes']['__']))
424 $this->stack[0]['attributes'] = array('__' => $string[$i + 1]);
425 if (isset($tag_definition['replace_func']))
427 $string[$i] = call_user_func($tag_definition['replace_func'], array('__' => $string[$i + 1]), $tag_definition);
429 else
431 $string[$i] = str_replace('{__}', $string[$i + 1], $tag_definition['replace']);
434 else if (isset($tag_definition['replace_func']))
436 $string[$i] = call_user_func($tag_definition['replace_func'], array(), $tag_definition);
438 else
440 $string[$i] = $tag_definition['replace'];
443 if (isset($this->tags[$tag]['content_func']))
445 $string[$i + 1] = call_user_func($tag_definition['content_func'], $string[$i + 1]);
448 else
450 if (isset($tag_definition['replace_func']))
452 $string[$i] = call_user_func($tag_definition['replace_func'], array(), $tag_definition);
454 else
456 $string[$i] = $tag_definition['replace'];
460 if (sizeof($tag_definition['attributes']) > 0)
462 // The tag has defined attributes but doesn't use any. The attribute replacements must be removed. I don't want this regex here.
463 $string[$i] = preg_replace('/{[^}]*}/', '', $string[$i]);
466 break;
468 case self::TYPE_TAG:
470 if (($this->flags & self::PARSE_BBCODE) == 0)
472 $string[$i] = $this->decompile_tag($string[$i]);
473 break;
476 // These apply to tags with attributes.
477 if (!isset($tag_data[2]))
479 $tag_data[2] = array('__' => $string[$i + 1]);
481 $this->stack[0]['attributes'] = $tag_data[2];
483 // Handle the (opening) tag with a custom function
484 if (isset($tag_definition['replace_func']))
487 $string[$i] = call_user_func($tag_definition['replace_func'], $tag_data[2], $tag_definition);
489 if (isset($tag_definition['content_func']) && $tag_definition['children'][0] === false && sizeof($tag_definition['children']) == 1)
491 $string[$i + 1] = call_user_func($tag_definition['content_func'], $string[$i + 1]);
493 break;
496 // New code for the feature I've always wanted to implement :)
497 if (isset($tag_definition['attributes']['__']) && $tag_definition['children'][0] == false && sizeof($tag_definition['children']) == 1)
499 $attributes = array('{__}');
500 $replacements = array($string[$i + 1]);
501 // End new code.
503 else
505 $attributes = array();
506 $replacements = array();
509 foreach ($tag_definition['attributes'] as $attribute => $value)
511 $attributes[] = '{' . $attribute . '}';
512 if (!isset($tag_data[2][$attribute]))
514 if (isset($value['required']))
516 $string[$i] = $this->decompile_tag($tag_data);
517 break 2;
519 $replacements[] = '';
520 continue;
523 $replacements[] = str_replace($this->attr_value_replace, $tag_data[2][$attribute], $tag_definition['attributes'][$attribute]['replace']);
527 $string[$i] = str_replace($attributes, $replacements, $this->tags[$tag]['replace']);
529 // It has to be twice... this should not be used if required attributes are missing.
530 if (isset($tag_definition['content_func']) && $tag_definition['children'][0] === false && sizeof($tag_definition['children']) == 1)
532 $string[$i + 1] = call_user_func($tag_definition['content_func'], $string[$i + 1]);
535 break;
539 return implode($string);
543 * Callback for preg_replace_callback in first_pass.
545 * @param array $matches
546 * @return string
548 private function first_pass_tag_check($matches)
550 switch (sizeof($matches))
552 // Smilies
553 case 1:
555 $this->parsed[$this->parse_pos] = array(self::TYPE_ABSTRACT_SMILEY, $matches[0]);
556 $this->parse_pos += 2;
557 return $this->delimiter;
559 break;
561 // URL
562 case 6:
564 $this->parsed[$this->parse_pos] = array(self::TYPE_ABSTRACT_URL, $matches[0]);
565 $this->parse_pos += 2;
566 return $this->delimiter;
568 break;
570 default:
572 if (!isset($this->tags[$matches[self::MATCH_TAG_NAME]]))
574 // Tag with the given name not defined.
575 return $matches[0];
578 // If tag is an opening tag.
579 if (strlen($matches[self::MATCH_CLOSING_TAG]) == 0)
581 if (sizeof($this->stack))
583 if ($this->tags[$this->stack[0]]['children'][0] == false && sizeof($this->tags[$this->stack[0]]['children']) == 1)
585 // Tag does not allow children.
586 return $matches[0];
588 // Tag parent not allowed for this tag. Omit here.
589 else if (!$this->parent_allowed($matches[self::MATCH_TAG_NAME], $this->stack[0]))
591 if (isset($this->tags[$this->stack[0]]['close_shadow']))
593 array_shift($this->stack);
595 else
597 return $matches[0];
601 // Is tag allowed in global scope?
602 else if (!$this->parent_allowed($matches[self::MATCH_TAG_NAME], '__global'))
604 return $matches[0];
607 if ($this->tags[$matches[self::MATCH_TAG_NAME]]['close'] !== false || !isset($this->tags[$matches[self::MATCH_TAG_NAME]]['close_shadow']))
609 // Do not add tags to stack that do not need closing tags.
610 array_unshift($this->stack, $matches[self::MATCH_TAG_NAME]);
613 $tag_attributes = &$this->tags[$matches[self::MATCH_TAG_NAME]]['attributes'];
615 if (strlen($matches[self::MATCH_SHORT_ARG]) != 0 && isset($tag_attributes['_']))
617 // Add short attribute.
618 $attributes = array('_' => substr($matches[self::MATCH_SHORT_ARG], 1, -1));
620 else if (strlen($matches[4]) == 0 || (sizeof($tag_attributes)) == 0)
622 // Check all attributes, which were not used, if they are required.
623 if ($this->has_required($matches[self::MATCH_TAG_NAME], array_keys($tag_attributes)))
625 // Not all required attributes were used.
626 return $matches[0];
628 else
630 $this->parsed[$this->parse_pos] = array(self::TYPE_TAG_SIMPLE, $matches[self::MATCH_TAG_NAME]);
631 if (isset($attributes))
633 $this->parsed[$this->parse_pos][] = $attributes;
635 $this->parse_pos += 2;
636 return $this->delimiter;
639 else
641 $attributes = array();
644 // Analyzer...
645 $matched_attrs = array();
647 preg_match_all($this->attribute_regex, $matches[self::MATCH_ARGS], $matched_attrs, PREG_SET_ORDER);
649 foreach($matched_attrs as $i => $value)
651 $tag_attribs_matched = &$tag_attributes[$value[self::MATCH_ARG_NAME]];
652 if (isset($attributes[$value[self::MATCH_ARG_NAME]]))
654 // This prevents adding the same attribute more than once. Childish betatesters are needed.
655 continue;
657 if (isset($tag_attribs_matched))
659 // The attribute exists within the defined tag. Undefined tags are removed.
661 $attr_value = substr($value[self::MATCH_ARG_VALUE], 1, -1);
663 if (isset($tag_attribs_matched['type_check']))
665 // A type check is needed for this attribute.
667 $type_check = $tag_attribs_matched['type_check']($attr_value);
669 if (!is_bool($type_check))
671 // The type check function decided to fix the input instead of returning false.
672 $attr_value = $type_check;
674 else if ($type_check === false)
676 // Type check has failed.
677 continue;
680 if (isset($tag_attribs_matched['required']) && strlen($attr_value) == 0)
682 // A required attribute is empty. This is done after the type check as the type check may return an empty value.
683 return $matches[0];
685 $attributes[$value[self::MATCH_ARG_NAME]] = $attr_value;
689 // Check all attributes, which were not used, if they are required.
690 if ($this->has_required($matches[self::MATCH_TAG_NAME], array_values(array_diff(array_keys($tag_attributes), array_keys($attributes)))))
692 // Not all required attributes were used.
693 return $matches[0];
696 if (sizeof($attributes))
698 $this->parsed[$this->parse_pos] = array(self::TYPE_TAG, $matches[self::MATCH_TAG_NAME], $attributes);
699 $this->parse_pos += 2;
700 return $this->delimiter;
703 $this->parsed[$this->parse_pos] = array(self::TYPE_TAG_SIMPLE, $matches[self::MATCH_TAG_NAME]);
704 $this->parse_pos += 2;
705 return $this->delimiter;
707 // If tag is a closing tag.
710 $valid = array_search($matches[self::MATCH_TAG_NAME], $this->stack);
712 if ($valid === false)
714 // Closing tag without open tag.
715 return $matches[0];
717 else if ($valid != 0)
719 if ($this->tags[$this->stack[0]]['children'][0] == false && sizeof($this->tags[$this->stack[0]]['children']) == 1)
721 // Tag does not allow children.
722 // Do not handle other closing tags here as they are invalid in tags which do not allow children.
723 return $matches[0];
725 // Now we have to close all tags that were opened before this closing tag.
726 // We know that this tag does not close the last opened tag.
727 $to_close = array_splice($this->stack, 0, $valid + 1);
728 return $this->close_tags($to_close);
730 else
732 // A unset() was elicting many notices here.
733 array_shift($this->stack);
734 $this->parsed[$this->parse_pos] = array(self::TYPE_CTAG, $matches[self::MATCH_TAG_NAME]);
735 $this->parse_pos += 2;
736 return $this->delimiter;
739 break;
744 * Returns closing tags for all tags in the $tags array (in reverse order).
746 * @param array $tags
747 * @return string
749 private function close_tags($tags)
751 $ret = '';
752 foreach($tags as $tag)
754 // @todo: Is this needed?
755 if (!isset($this->tags[$tag]['close_shadow']))
757 $this->parsed[$this->parse_pos] = array(self::TYPE_CTAG, $tag);
758 $this->parse_pos += 2;
759 $ret .= $this->delimiter;
762 return $ret;
766 * Returns the tag to the form it had before the first_pass
768 * @param array $tag
769 * @return string
771 private function decompile_tag(array $tag)
773 $ret = '[' . (($tag[0]) ? '' : '/');
774 $ret .= $tag[1];
776 if(isset($tag[2]))
778 if (isset($tag[2]['_']))
780 $ret .= '="' . $tag[2]['_'] . '"';
781 unset($tag[2]['_']);
784 foreach ($tag[2] as $attribute => $value)
786 $ret .= ' ' . $attribute . '="' . $value . '"';
789 $ret .= ']';
791 return $ret;
795 * Checks if $tag can be a child of the tag in stack index $index
797 * @param string $tag
798 * @param int $index = 0
799 * @return bool
801 private function child_allowed($tag, $index = 0)
803 if (!isset($this->stack[$index]))
805 return true;
807 // I assume this trick is usefull starting form two.
808 $children = &$this->tags[$this->stack[$index]['name']]['children'];
809 if (isset($children[$tag]) xor $children[0])
811 return true;
813 else
815 return false;
820 * Checks if the $tag can be a child of $parent
822 * @param string $tag
823 * @param string $parent
824 * @return bool
826 private function parent_allowed($tag, $parent)
828 $parents = &$this->tags[$tag]['parents'];
829 if (isset($parents[$parent]) xor $parents[0])
831 return true;
833 else
835 return false;
840 * Checks if any of $tag's attributes in $attributes are required.
842 * @param string $tag
843 * @param string $attributes
844 * @return bool
846 private function has_required($tag, $attributes)
848 for ($i = 0, $n = sizeof($attributes); $i < $n; ++$i)
850 if (isset($this->tags[$tag]['attributes'][$attributes[$i]]['required']))
852 return true;
856 return false;
859 private function regex_quote($var)
861 return preg_quote($var, '~');
864 public function set_flags($flags)
866 $this->flags = (int) $flags;