Don't unset parser variable; plays poorly with serialize.
[htmlpurifier.git] / library / HTMLPurifier / Config.php
blobcef413066f0c2a19505369e61d7c82ad418eed18
1 <?php
3 /**
4 * Configuration object that triggers customizable behavior.
6 * @warning This class is strongly defined: that means that the class
7 * will fail if an undefined directive is retrieved or set.
9 * @note Many classes that could (although many times don't) use the
10 * configuration object make it a mandatory parameter. This is
11 * because a configuration object should always be forwarded,
12 * otherwise, you run the risk of missing a parameter and then
13 * being stumped when a configuration directive doesn't work.
15 * @todo Reconsider some of the public member variables
17 class HTMLPurifier_Config
20 /**
21 * HTML Purifier's version
23 public $version = '4.3.0';
25 /**
26 * Bool indicator whether or not to automatically finalize
27 * the object if a read operation is done
29 public $autoFinalize = true;
31 // protected member variables
33 /**
34 * Namespace indexed array of serials for specific namespaces (see
35 * getSerial() for more info).
37 protected $serials = array();
39 /**
40 * Serial for entire configuration object
42 protected $serial;
44 /**
45 * Parser for variables
47 protected $parser = null;
49 /**
50 * Reference HTMLPurifier_ConfigSchema for value checking
51 * @note This is public for introspective purposes. Please don't
52 * abuse!
54 public $def;
56 /**
57 * Indexed array of definitions
59 protected $definitions;
61 /**
62 * Bool indicator whether or not config is finalized
64 protected $finalized = false;
66 /**
67 * Property list containing configuration directives.
69 protected $plist;
71 /**
72 * Whether or not a set is taking place due to an
73 * alias lookup.
75 private $aliasMode;
77 /**
78 * Set to false if you do not want line and file numbers in errors
79 * (useful when unit testing). This will also compress some errors
80 * and exceptions.
82 public $chatty = true;
84 /**
85 * Current lock; only gets to this namespace are allowed.
87 private $lock;
89 /**
90 * @param $definition HTMLPurifier_ConfigSchema that defines what directives
91 * are allowed.
93 public function __construct($definition, $parent = null) {
94 $parent = $parent ? $parent : $definition->defaultPlist;
95 $this->plist = new HTMLPurifier_PropertyList($parent);
96 $this->def = $definition; // keep a copy around for checking
97 $this->parser = new HTMLPurifier_VarParser_Flexible();
101 * Convenience constructor that creates a config object based on a mixed var
102 * @param mixed $config Variable that defines the state of the config
103 * object. Can be: a HTMLPurifier_Config() object,
104 * an array of directives based on loadArray(),
105 * or a string filename of an ini file.
106 * @param HTMLPurifier_ConfigSchema Schema object
107 * @return Configured HTMLPurifier_Config object
109 public static function create($config, $schema = null) {
110 if ($config instanceof HTMLPurifier_Config) {
111 // pass-through
112 return $config;
114 if (!$schema) {
115 $ret = HTMLPurifier_Config::createDefault();
116 } else {
117 $ret = new HTMLPurifier_Config($schema);
119 if (is_string($config)) $ret->loadIni($config);
120 elseif (is_array($config)) $ret->loadArray($config);
121 return $ret;
125 * Creates a new config object that inherits from a previous one.
126 * @param HTMLPurifier_Config $config Configuration object to inherit
127 * from.
128 * @return HTMLPurifier_Config object with $config as its parent.
130 public static function inherit(HTMLPurifier_Config $config) {
131 return new HTMLPurifier_Config($config->def, $config->plist);
135 * Convenience constructor that creates a default configuration object.
136 * @return Default HTMLPurifier_Config object.
138 public static function createDefault() {
139 $definition = HTMLPurifier_ConfigSchema::instance();
140 $config = new HTMLPurifier_Config($definition);
141 return $config;
145 * Retreives a value from the configuration.
146 * @param $key String key
148 public function get($key, $a = null) {
149 if ($a !== null) {
150 $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
151 $key = "$key.$a";
153 if (!$this->finalized) $this->autoFinalize();
154 if (!isset($this->def->info[$key])) {
155 // can't add % due to SimpleTest bug
156 $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
157 E_USER_WARNING);
158 return;
160 if (isset($this->def->info[$key]->isAlias)) {
161 $d = $this->def->info[$key];
162 $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
163 E_USER_ERROR);
164 return;
166 if ($this->lock) {
167 list($ns) = explode('.', $key);
168 if ($ns !== $this->lock) {
169 $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
170 return;
173 return $this->plist->get($key);
177 * Retreives an array of directives to values from a given namespace
178 * @param $namespace String namespace
180 public function getBatch($namespace) {
181 if (!$this->finalized) $this->autoFinalize();
182 $full = $this->getAll();
183 if (!isset($full[$namespace])) {
184 $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
185 E_USER_WARNING);
186 return;
188 return $full[$namespace];
192 * Returns a md5 signature of a segment of the configuration object
193 * that uniquely identifies that particular configuration
194 * @note Revision is handled specially and is removed from the batch
195 * before processing!
196 * @param $namespace Namespace to get serial for
198 public function getBatchSerial($namespace) {
199 if (empty($this->serials[$namespace])) {
200 $batch = $this->getBatch($namespace);
201 unset($batch['DefinitionRev']);
202 $this->serials[$namespace] = md5(serialize($batch));
204 return $this->serials[$namespace];
208 * Returns a md5 signature for the entire configuration object
209 * that uniquely identifies that particular configuration
211 public function getSerial() {
212 if (empty($this->serial)) {
213 $this->serial = md5(serialize($this->getAll()));
215 return $this->serial;
219 * Retrieves all directives, organized by namespace
220 * @warning This is a pretty inefficient function, avoid if you can
222 public function getAll() {
223 if (!$this->finalized) $this->autoFinalize();
224 $ret = array();
225 foreach ($this->plist->squash() as $name => $value) {
226 list($ns, $key) = explode('.', $name, 2);
227 $ret[$ns][$key] = $value;
229 return $ret;
233 * Sets a value to configuration.
234 * @param $key String key
235 * @param $value Mixed value
237 public function set($key, $value, $a = null) {
238 if (strpos($key, '.') === false) {
239 $namespace = $key;
240 $directive = $value;
241 $value = $a;
242 $key = "$key.$directive";
243 $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
244 } else {
245 list($namespace) = explode('.', $key);
247 if ($this->isFinalized('Cannot set directive after finalization')) return;
248 if (!isset($this->def->info[$key])) {
249 $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
250 E_USER_WARNING);
251 return;
253 $def = $this->def->info[$key];
255 if (isset($def->isAlias)) {
256 if ($this->aliasMode) {
257 $this->triggerError('Double-aliases not allowed, please fix '.
258 'ConfigSchema bug with' . $key, E_USER_ERROR);
259 return;
261 $this->aliasMode = true;
262 $this->set($def->key, $value);
263 $this->aliasMode = false;
264 $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
265 return;
268 // Raw type might be negative when using the fully optimized form
269 // of stdclass, which indicates allow_null == true
270 $rtype = is_int($def) ? $def : $def->type;
271 if ($rtype < 0) {
272 $type = -$rtype;
273 $allow_null = true;
274 } else {
275 $type = $rtype;
276 $allow_null = isset($def->allow_null);
279 try {
280 $value = $this->parser->parse($value, $type, $allow_null);
281 } catch (HTMLPurifier_VarParserException $e) {
282 $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
283 return;
285 if (is_string($value) && is_object($def)) {
286 // resolve value alias if defined
287 if (isset($def->aliases[$value])) {
288 $value = $def->aliases[$value];
290 // check to see if the value is allowed
291 if (isset($def->allowed) && !isset($def->allowed[$value])) {
292 $this->triggerError('Value not supported, valid values are: ' .
293 $this->_listify($def->allowed), E_USER_WARNING);
294 return;
297 $this->plist->set($key, $value);
299 // reset definitions if the directives they depend on changed
300 // this is a very costly process, so it's discouraged
301 // with finalization
302 if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
303 $this->definitions[$namespace] = null;
306 $this->serials[$namespace] = false;
310 * Convenience function for error reporting
312 private function _listify($lookup) {
313 $list = array();
314 foreach ($lookup as $name => $b) $list[] = $name;
315 return implode(', ', $list);
319 * Retrieves object reference to the HTML definition.
320 * @param $raw Return a copy that has not been setup yet. Must be
321 * called before it's been setup, otherwise won't work.
322 * @param $optimized If true, this method may return null, to
323 * indicate that a cached version of the modified
324 * definition object is available and no further edits
325 * are necessary. Consider using
326 * maybeGetRawHTMLDefinition, which is more explicitly
327 * named, instead.
329 public function getHTMLDefinition($raw = false, $optimized = false) {
330 return $this->getDefinition('HTML', $raw, $optimized);
334 * Retrieves object reference to the CSS definition
335 * @param $raw Return a copy that has not been setup yet. Must be
336 * called before it's been setup, otherwise won't work.
337 * @param $optimized If true, this method may return null, to
338 * indicate that a cached version of the modified
339 * definition object is available and no further edits
340 * are necessary. Consider using
341 * maybeGetRawCSSDefinition, which is more explicitly
342 * named, instead.
344 public function getCSSDefinition($raw = false, $optimized = false) {
345 return $this->getDefinition('CSS', $raw, $optimized);
349 * Retrieves object reference to the URI definition
350 * @param $raw Return a copy that has not been setup yet. Must be
351 * called before it's been setup, otherwise won't work.
352 * @param $optimized If true, this method may return null, to
353 * indicate that a cached version of the modified
354 * definition object is available and no further edits
355 * are necessary. Consider using
356 * maybeGetRawURIDefinition, which is more explicitly
357 * named, instead.
359 public function getURIDefinition($raw = false, $optimized = false) {
360 return $this->getDefinition('URI', $raw, $optimized);
364 * Retrieves a definition
365 * @param $type Type of definition: HTML, CSS, etc
366 * @param $raw Whether or not definition should be returned raw
367 * @param $optimized Only has an effect when $raw is true. Whether
368 * or not to return null if the result is already present in
369 * the cache. This is off by default for backwards
370 * compatibility reasons, but you need to do things this
371 * way in order to ensure that caching is done properly.
372 * Check out enduser-customize.html for more details.
373 * We probably won't ever change this default, as much as the
374 * maybe semantics is the "right thing to do."
376 public function getDefinition($type, $raw = false, $optimized = false) {
377 if ($optimized && !$raw) {
378 throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
380 if (!$this->finalized) $this->autoFinalize();
381 // temporarily suspend locks, so we can handle recursive definition calls
382 $lock = $this->lock;
383 $this->lock = null;
384 $factory = HTMLPurifier_DefinitionCacheFactory::instance();
385 $cache = $factory->create($type, $this);
386 $this->lock = $lock;
387 if (!$raw) {
388 // full definition
389 // ---------------
390 // check if definition is in memory
391 if (!empty($this->definitions[$type])) {
392 $def = $this->definitions[$type];
393 // check if the definition is setup
394 if ($def->setup) {
395 return $def;
396 } else {
397 $def->setup($this);
398 if ($def->optimized) $cache->add($def, $this);
399 return $def;
402 // check if definition is in cache
403 $def = $cache->get($this);
404 if ($def) {
405 // definition in cache, save to memory and return it
406 $this->definitions[$type] = $def;
407 return $def;
409 // initialize it
410 $def = $this->initDefinition($type);
411 // set it up
412 $this->lock = $type;
413 $def->setup($this);
414 $this->lock = null;
415 // save in cache
416 $cache->add($def, $this);
417 // return it
418 return $def;
419 } else {
420 // raw definition
421 // --------------
422 // check preconditions
423 $def = null;
424 if ($optimized) {
425 if (is_null($this->get($type . '.DefinitionID'))) {
426 // fatally error out if definition ID not set
427 throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
430 if (!empty($this->definitions[$type])) {
431 $def = $this->definitions[$type];
432 if ($def->setup && !$optimized) {
433 $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
434 throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
436 if ($def->optimized === null) {
437 $extra = $this->chatty ? " (try flushing your cache)" : "";
438 throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
440 if ($def->optimized !== $optimized) {
441 $msg = $optimized ? "optimized" : "unoptimized";
442 $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
443 throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
446 // check if definition was in memory
447 if ($def) {
448 if ($def->setup) {
449 // invariant: $optimized === true (checked above)
450 return null;
451 } else {
452 return $def;
455 // if optimized, check if definition was in cache
456 // (because we do the memory check first, this formulation
457 // is prone to cache slamming, but I think
458 // guaranteeing that either /all/ of the raw
459 // setup code or /none/ of it is run is more important.)
460 if ($optimized) {
461 // This code path only gets run once; once we put
462 // something in $definitions (which is guaranteed by the
463 // trailing code), we always short-circuit above.
464 $def = $cache->get($this);
465 if ($def) {
466 // save the full definition for later, but don't
467 // return it yet
468 $this->definitions[$type] = $def;
469 return null;
472 // check invariants for creation
473 if (!$optimized) {
474 if (!is_null($this->get($type . '.DefinitionID'))) {
475 if ($this->chatty) {
476 $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
477 } else {
478 $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
482 // initialize it
483 $def = $this->initDefinition($type);
484 $def->optimized = $optimized;
485 return $def;
487 throw new HTMLPurifier_Exception("The impossible happened!");
490 private function initDefinition($type) {
491 // quick checks failed, let's create the object
492 if ($type == 'HTML') {
493 $def = new HTMLPurifier_HTMLDefinition();
494 } elseif ($type == 'CSS') {
495 $def = new HTMLPurifier_CSSDefinition();
496 } elseif ($type == 'URI') {
497 $def = new HTMLPurifier_URIDefinition();
498 } else {
499 throw new HTMLPurifier_Exception("Definition of $type type not supported");
501 $this->definitions[$type] = $def;
502 return $def;
505 public function maybeGetRawDefinition($name) {
506 return $this->getDefinition($name, true, true);
509 public function maybeGetRawHTMLDefinition() {
510 return $this->getDefinition('HTML', true, true);
513 public function maybeGetRawCSSDefinition() {
514 return $this->getDefinition('CSS', true, true);
517 public function maybeGetRawURIDefinition() {
518 return $this->getDefinition('URI', true, true);
522 * Loads configuration values from an array with the following structure:
523 * Namespace.Directive => Value
524 * @param $config_array Configuration associative array
526 public function loadArray($config_array) {
527 if ($this->isFinalized('Cannot load directives after finalization')) return;
528 foreach ($config_array as $key => $value) {
529 $key = str_replace('_', '.', $key);
530 if (strpos($key, '.') !== false) {
531 $this->set($key, $value);
532 } else {
533 $namespace = $key;
534 $namespace_values = $value;
535 foreach ($namespace_values as $directive => $value) {
536 $this->set($namespace .'.'. $directive, $value);
543 * Returns a list of array(namespace, directive) for all directives
544 * that are allowed in a web-form context as per an allowed
545 * namespaces/directives list.
546 * @param $allowed List of allowed namespaces/directives
548 public static function getAllowedDirectivesForForm($allowed, $schema = null) {
549 if (!$schema) {
550 $schema = HTMLPurifier_ConfigSchema::instance();
552 if ($allowed !== true) {
553 if (is_string($allowed)) $allowed = array($allowed);
554 $allowed_ns = array();
555 $allowed_directives = array();
556 $blacklisted_directives = array();
557 foreach ($allowed as $ns_or_directive) {
558 if (strpos($ns_or_directive, '.') !== false) {
559 // directive
560 if ($ns_or_directive[0] == '-') {
561 $blacklisted_directives[substr($ns_or_directive, 1)] = true;
562 } else {
563 $allowed_directives[$ns_or_directive] = true;
565 } else {
566 // namespace
567 $allowed_ns[$ns_or_directive] = true;
571 $ret = array();
572 foreach ($schema->info as $key => $def) {
573 list($ns, $directive) = explode('.', $key, 2);
574 if ($allowed !== true) {
575 if (isset($blacklisted_directives["$ns.$directive"])) continue;
576 if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
578 if (isset($def->isAlias)) continue;
579 if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
580 $ret[] = array($ns, $directive);
582 return $ret;
586 * Loads configuration values from $_GET/$_POST that were posted
587 * via ConfigForm
588 * @param $array $_GET or $_POST array to import
589 * @param $index Index/name that the config variables are in
590 * @param $allowed List of allowed namespaces/directives
591 * @param $mq_fix Boolean whether or not to enable magic quotes fix
592 * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy
594 public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
595 $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
596 $config = HTMLPurifier_Config::create($ret, $schema);
597 return $config;
601 * Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
602 * @note Same parameters as loadArrayFromForm
604 public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
605 $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
606 $this->loadArray($ret);
610 * Prepares an array from a form into something usable for the more
611 * strict parts of HTMLPurifier_Config
613 public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
614 if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
615 $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
617 $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
618 $ret = array();
619 foreach ($allowed as $key) {
620 list($ns, $directive) = $key;
621 $skey = "$ns.$directive";
622 if (!empty($array["Null_$skey"])) {
623 $ret[$ns][$directive] = null;
624 continue;
626 if (!isset($array[$skey])) continue;
627 $value = $mq ? stripslashes($array[$skey]) : $array[$skey];
628 $ret[$ns][$directive] = $value;
630 return $ret;
634 * Loads configuration values from an ini file
635 * @param $filename Name of ini file
637 public function loadIni($filename) {
638 if ($this->isFinalized('Cannot load directives after finalization')) return;
639 $array = parse_ini_file($filename, true);
640 $this->loadArray($array);
644 * Checks whether or not the configuration object is finalized.
645 * @param $error String error message, or false for no error
647 public function isFinalized($error = false) {
648 if ($this->finalized && $error) {
649 $this->triggerError($error, E_USER_ERROR);
651 return $this->finalized;
655 * Finalizes configuration only if auto finalize is on and not
656 * already finalized
658 public function autoFinalize() {
659 if ($this->autoFinalize) {
660 $this->finalize();
661 } else {
662 $this->plist->squash(true);
667 * Finalizes a configuration object, prohibiting further change
669 public function finalize() {
670 $this->finalized = true;
671 $this->parser = null;
675 * Produces a nicely formatted error message by supplying the
676 * stack frame information OUTSIDE of HTMLPurifier_Config.
678 protected function triggerError($msg, $no) {
679 // determine previous stack frame
680 $extra = '';
681 if ($this->chatty) {
682 $trace = debug_backtrace();
683 // zip(tail(trace), trace) -- but PHP is not Haskell har har
684 for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
685 if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
686 continue;
688 $frame = $trace[$i];
689 $extra = " invoked on line {$frame['line']} in file {$frame['file']}";
690 break;
693 trigger_error($msg . $extra, $no);
697 * Returns a serialized form of the configuration object that can
698 * be reconstituted.
700 public function serialize() {
701 $this->getDefinition('HTML');
702 $this->getDefinition('CSS');
703 $this->getDefinition('URI');
704 return serialize($this);
709 // vim: et sw=4 sts=4