Release 1.4.1, merged in 685-687.
[htmlpurifier.git] / library / HTMLPurifier.php
blobcf11b50a430cc4bc7a5ba13e542df7078b69172f
1 <?php
3 /*!
4 * @mainpage
5 *
6 * HTML Purifier is an HTML filter that will take an arbitrary snippet of
7 * HTML and rigorously test, validate and filter it into a version that
8 * is safe for output onto webpages. It achieves this by:
9 *
10 * -# Lexing (parsing into tokens) the document,
11 * -# Executing various strategies on the tokens:
12 * -# Removing all elements not in the whitelist,
13 * -# Making the tokens well-formed,
14 * -# Fixing the nesting of the nodes, and
15 * -# Validating attributes of the nodes; and
16 * -# Generating HTML from the purified tokens.
18 * However, most users will only need to interface with the HTMLPurifier
19 * class, so this massive amount of infrastructure is usually concealed.
20 * If you plan on working with the internals, be sure to include
21 * HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
25 HTML Purifier 1.4.1 - Standards Compliant HTML Filtering
26 Copyright (C) 2006 Edward Z. Yang
28 This library is free software; you can redistribute it and/or
29 modify it under the terms of the GNU Lesser General Public
30 License as published by the Free Software Foundation; either
31 version 2.1 of the License, or (at your option) any later version.
33 This library is distributed in the hope that it will be useful,
34 but WITHOUT ANY WARRANTY; without even the implied warranty of
35 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 Lesser General Public License for more details.
38 You should have received a copy of the GNU Lesser General Public
39 License along with this library; if not, write to the Free Software
40 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
43 // almost every class has an undocumented dependency to these, so make sure
44 // they get included
45 require_once 'HTMLPurifier/ConfigSchema.php';
46 require_once 'HTMLPurifier/Config.php';
47 require_once 'HTMLPurifier/Context.php';
49 require_once 'HTMLPurifier/Lexer.php';
50 require_once 'HTMLPurifier/Generator.php';
51 require_once 'HTMLPurifier/Strategy/Core.php';
52 require_once 'HTMLPurifier/Encoder.php';
54 /**
55 * Main library execution class.
57 * Facade that performs calls to the HTMLPurifier_Lexer,
58 * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
59 * purify HTML.
61 * @todo We need an easier way to inject strategies, it'll probably end
62 * up getting done through config though.
64 class HTMLPurifier
67 var $version = '1.4.1';
69 var $config;
70 var $filters;
72 var $lexer, $strategy, $generator;
74 /**
75 * Final HTMLPurifier_Context of last run purification. Might be an array.
76 * @public
78 var $context;
80 /**
81 * Initializes the purifier.
82 * @param $config Optional HTMLPurifier_Config object for all instances of
83 * the purifier, if omitted, a default configuration is
84 * supplied (which can be overridden on a per-use basis).
85 * The parameter can also be any type that
86 * HTMLPurifier_Config::create() supports.
88 function HTMLPurifier($config = null) {
90 $this->config = HTMLPurifier_Config::create($config);
92 $this->lexer = HTMLPurifier_Lexer::create();
93 $this->strategy = new HTMLPurifier_Strategy_Core();
94 $this->generator = new HTMLPurifier_Generator();
98 /**
99 * Adds a filter to process the output. First come first serve
100 * @param $filter HTMLPurifier_Filter object
102 function addFilter($filter) {
103 $this->filters[] = $filter;
107 * Filters an HTML snippet/document to be XSS-free and standards-compliant.
109 * @param $html String of HTML to purify
110 * @param $config HTMLPurifier_Config object for this operation, if omitted,
111 * defaults to the config object specified during this
112 * object's construction. The parameter can also be any type
113 * that HTMLPurifier_Config::create() supports.
114 * @return Purified HTML
116 function purify($html, $config = null) {
118 $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
120 $context = new HTMLPurifier_Context();
121 $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
123 for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
124 $html = $this->filters[$i]->preFilter($html, $config, $context);
127 // purified HTML
128 $html =
129 $this->generator->generateFromTokens(
130 // list of tokens
131 $this->strategy->execute(
132 // list of un-purified tokens
133 $this->lexer->tokenizeHTML(
134 // un-purified HTML
135 $html, $config, $context
137 $config, $context
139 $config, $context
142 for ($i = $size - 1; $i >= 0; $i--) {
143 $html = $this->filters[$i]->postFilter($html, $config, $context);
146 $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
147 $this->context =& $context;
148 return $html;
152 * Filters an array of HTML snippets
153 * @param $config Optional HTMLPurifier_Config object for this operation.
154 * See HTMLPurifier::purify() for more details.
155 * @return Array of purified HTML
157 function purifyArray($array_of_html, $config = null) {
158 $context_array = array();
159 foreach ($array_of_html as $key => $html) {
160 $array_of_html[$key] = $this->purify($html, $config);
161 $context_array[$key] = $this->context;
163 $this->context = $context_array;
164 return $array_of_html;