Rewrite CSS url() and font-family output logic.
[htmlpurifier.git] / library / HTMLPurifier / AttrDef.php
blobb2e4f36c5df40bdd12c93930c06c93a964aa5101
1 <?php
3 /**
4 * Base class for all validating attribute definitions.
6 * This family of classes forms the core for not only HTML attribute validation,
7 * but also any sort of string that needs to be validated or cleaned (which
8 * means CSS properties and composite definitions are defined here too).
9 * Besides defining (through code) what precisely makes the string valid,
10 * subclasses are also responsible for cleaning the code if possible.
13 abstract class HTMLPurifier_AttrDef
16 /**
17 * Tells us whether or not an HTML attribute is minimized. Has no
18 * meaning in other contexts.
20 public $minimized = false;
22 /**
23 * Tells us whether or not an HTML attribute is required. Has no
24 * meaning in other contexts
26 public $required = false;
28 /**
29 * Validates and cleans passed string according to a definition.
31 * @param $string String to be validated and cleaned.
32 * @param $config Mandatory HTMLPurifier_Config object.
33 * @param $context Mandatory HTMLPurifier_AttrContext object.
35 abstract public function validate($string, $config, $context);
37 /**
38 * Convenience method that parses a string as if it were CDATA.
40 * This method process a string in the manner specified at
41 * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
42 * leading and trailing whitespace, ignoring line feeds, and replacing
43 * carriage returns and tabs with spaces. While most useful for HTML
44 * attributes specified as CDATA, it can also be applied to most CSS
45 * values.
47 * @note This method is not entirely standards compliant, as trim() removes
48 * more types of whitespace than specified in the spec. In practice,
49 * this is rarely a problem, as those extra characters usually have
50 * already been removed by HTMLPurifier_Encoder.
52 * @warning This processing is inconsistent with XML's whitespace handling
53 * as specified by section 3.3.3 and referenced XHTML 1.0 section
54 * 4.7. However, note that we are NOT necessarily
55 * parsing XML, thus, this behavior may still be correct. We
56 * assume that newlines have been normalized.
58 public function parseCDATA($string) {
59 $string = trim($string);
60 $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
61 return $string;
64 /**
65 * Factory method for creating this class from a string.
66 * @param $string String construction info
67 * @return Created AttrDef object corresponding to $string
69 public function make($string) {
70 // default implementation, return a flyweight of this object.
71 // If $string has an effect on the returned object (i.e. you
72 // need to overload this method), it is best
73 // to clone or instantiate new copies. (Instantiation is safer.)
74 return $this;
77 /**
78 * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
79 * properly. THIS IS A HACK!
81 protected function mungeRgb($string) {
82 return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
85 /**
86 * Parses a possibly escaped CSS string and returns the "pure"
87 * version of it.
89 protected function expandCSSEscape($string) {
90 // flexibly parse it
91 $ret = '';
92 for ($i = 0, $c = strlen($string); $i < $c; $i++) {
93 if ($string[$i] === '\\') {
94 $i++;
95 if ($i >= $c) {
96 $ret .= '\\';
97 break;
99 if (ctype_xdigit($string[$i])) {
100 $code = $string[$i];
101 for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
102 if (!ctype_xdigit($string[$i])) break;
103 $code .= $string[$i];
105 // We have to be extremely careful when adding
106 // new characters, to make sure we're not breaking
107 // the encoding.
108 $char = HTMLPurifier_Encoder::unichr(hexdec($code));
109 if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
110 $ret .= $char;
111 if ($i < $c && trim($string[$i]) !== '') $i--;
112 continue;
114 if ($string[$i] === "\n") continue;
116 $ret .= $string[$i];
118 return $ret;
123 // vim: et sw=4 sts=4