Add note fall through is intentional.
[htmlpurifier.git] / library / HTMLPurifier.php
blob02fb0902b66d0d7b71f0194d9f19392e2b5a3756
1 <?php
3 /*! @mainpage
5 * HTML Purifier is an HTML filter that will take an arbitrary snippet of
6 * HTML and rigorously test, validate and filter it into a version that
7 * is safe for output onto webpages. It achieves this by:
9 * -# Lexing (parsing into tokens) the document,
10 * -# Executing various strategies on the tokens:
11 * -# Removing all elements not in the whitelist,
12 * -# Making the tokens well-formed,
13 * -# Fixing the nesting of the nodes, and
14 * -# Validating attributes of the nodes; and
15 * -# Generating HTML from the purified tokens.
17 * However, most users will only need to interface with the HTMLPurifier
18 * and HTMLPurifier_Config.
22 HTML Purifier 4.5.0 - Standards Compliant HTML Filtering
23 Copyright (C) 2006-2008 Edward Z. Yang
25 This library is free software; you can redistribute it and/or
26 modify it under the terms of the GNU Lesser General Public
27 License as published by the Free Software Foundation; either
28 version 2.1 of the License, or (at your option) any later version.
30 This library is distributed in the hope that it will be useful,
31 but WITHOUT ANY WARRANTY; without even the implied warranty of
32 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
33 Lesser General Public License for more details.
35 You should have received a copy of the GNU Lesser General Public
36 License along with this library; if not, write to the Free Software
37 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
40 /**
41 * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
43 * @note There are several points in which configuration can be specified
44 * for HTML Purifier. The precedence of these (from lowest to
45 * highest) is as follows:
46 * -# Instance: new HTMLPurifier($config)
47 * -# Invocation: purify($html, $config)
48 * These configurations are entirely independent of each other and
49 * are *not* merged (this behavior may change in the future).
51 * @todo We need an easier way to inject strategies using the configuration
52 * object.
54 class HTMLPurifier
57 /**
58 * @var string Version of HTML Purifier
60 public $version = '4.5.0';
62 /**
63 * Constant with version of HTML Purifier
65 const VERSION = '4.5.0';
67 /**
68 * @var HTMLPurifier_Config Global configuration object
70 public $config;
72 /**
73 * @var HTMLPurifier_Filter[] Array of extra filter objects to run on HTML,
74 * for backwards compatibility
76 private $filters = array();
78 /**
79 * @var HTMLPurifier Single instance of HTML Purifier
81 private static $instance;
83 protected $strategy, $generator;
85 /**
86 * @var HTMLPurifier_Context Resultant context of last run purification.
87 * Is an array of contexts if the last called method was purifyArray().
89 public $context;
91 /**
92 * Initializes the purifier.
94 * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
95 * for all instances of the purifier, if omitted, a default
96 * configuration is supplied (which can be overridden on a
97 * per-use basis).
98 * The parameter can also be any type that
99 * HTMLPurifier_Config::create() supports.
101 public function __construct($config = null) {
103 $this->config = HTMLPurifier_Config::create($config);
105 $this->strategy = new HTMLPurifier_Strategy_Core();
110 * Adds a filter to process the output. First come first serve
112 * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
114 public function addFilter($filter) {
115 trigger_error(
116 'HTMLPurifier->addFilter() is deprecated, use configuration directives'.
117 ' in the Filter namespace or Filter.Custom',
118 E_USER_WARNING
120 $this->filters[] = $filter;
124 * Filters an HTML snippet/document to be XSS-free and standards-compliant.
126 * @param string $html String of HTML to purify
127 * @param HTMLPurifier_Config $config Config object for this operation,
128 * if omitted, defaults to the config object specified during this
129 * object's construction. The parameter can also be any type
130 * that HTMLPurifier_Config::create() supports.
132 * @return string Purified HTML
134 public function purify($html, $config = null) {
136 // :TODO: make the config merge in, instead of replace
137 $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
139 // implementation is partially environment dependant, partially
140 // configuration dependant
141 $lexer = HTMLPurifier_Lexer::create($config);
143 $context = new HTMLPurifier_Context();
145 // setup HTML generator
146 $this->generator = new HTMLPurifier_Generator($config, $context);
147 $context->register('Generator', $this->generator);
149 // set up global context variables
150 if ($config->get('Core.CollectErrors')) {
151 // may get moved out if other facilities use it
152 $language_factory = HTMLPurifier_LanguageFactory::instance();
153 $language = $language_factory->create($config, $context);
154 $context->register('Locale', $language);
156 $error_collector = new HTMLPurifier_ErrorCollector($context);
157 $context->register('ErrorCollector', $error_collector);
160 // setup id_accumulator context, necessary due to the fact that
161 // AttrValidator can be called from many places
162 $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
163 $context->register('IDAccumulator', $id_accumulator);
165 $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
167 // setup filters
168 $filter_flags = $config->getBatch('Filter');
169 $custom_filters = $filter_flags['Custom'];
170 unset($filter_flags['Custom']);
171 $filters = array();
172 foreach ($filter_flags as $filter => $flag) {
173 if (!$flag) continue;
174 if (strpos($filter, '.') !== false) continue;
175 $class = "HTMLPurifier_Filter_$filter";
176 $filters[] = new $class;
178 foreach ($custom_filters as $filter) {
179 // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
180 $filters[] = $filter;
182 $filters = array_merge($filters, $this->filters);
183 // maybe prepare(), but later
185 for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
186 $html = $filters[$i]->preFilter($html, $config, $context);
189 // purified HTML
190 $html =
191 $this->generator->generateFromTokens(
192 // list of tokens
193 $this->strategy->execute(
194 // list of un-purified tokens
195 $lexer->tokenizeHTML(
196 // un-purified HTML
197 $html, $config, $context
199 $config, $context
203 for ($i = $filter_size - 1; $i >= 0; $i--) {
204 $html = $filters[$i]->postFilter($html, $config, $context);
207 $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
208 $this->context =& $context;
209 return $html;
213 * Filters an array of HTML snippets
215 * @param string[] $array_of_html Array of html snippets
216 * @param HTMLPurifier_Config $config Optional config object for this operation.
217 * See HTMLPurifier::purify() for more details.
219 * @return string[] Array of purified HTML
221 public function purifyArray($array_of_html, $config = null) {
222 $context_array = array();
223 foreach ($array_of_html as $key => $html) {
224 $array_of_html[$key] = $this->purify($html, $config);
225 $context_array[$key] = $this->context;
227 $this->context = $context_array;
228 return $array_of_html;
232 * Singleton for enforcing just one HTML Purifier in your system
234 * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
235 * HTMLPurifier instance to overload singleton with,
236 * or HTMLPurifier_Config instance to configure the
237 * generated version with.
239 * @return HTMLPurifier
241 public static function instance($prototype = null) {
242 if (!self::$instance || $prototype) {
243 if ($prototype instanceof HTMLPurifier) {
244 self::$instance = $prototype;
245 } elseif ($prototype) {
246 self::$instance = new HTMLPurifier($prototype);
247 } else {
248 self::$instance = new HTMLPurifier();
251 return self::$instance;
255 * Singleton for enforcing just one HTML Purifier in your system
257 * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
258 * HTMLPurifier instance to overload singleton with,
259 * or HTMLPurifier_Config instance to configure the
260 * generated version with.
262 * @return HTMLPurifier
263 * @note Backwards compatibility, see instance()
265 public static function getInstance($prototype = null) {
266 return HTMLPurifier::instance($prototype);
271 // vim: et sw=4 sts=4