3 HTMLPurifier_ConfigSchema
::define(
4 'Output', 'CommentScriptContents', true, 'bool',
5 'Determines whether or not HTML Purifier should attempt to fix up '.
6 'the contents of script tags for legacy browsers with comments. This '.
7 'directive was available since 2.0.0.'
9 HTMLPurifier_ConfigSchema
::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
11 // extension constraints could be factored into ConfigSchema
12 HTMLPurifier_ConfigSchema
::define(
13 'Output', 'TidyFormat', false, 'bool', <<<HTML
15 Determines whether or not to run Tidy on the final output for pretty
16 formatting reasons, such as indentation and wrap.
19 This can greatly improve readability for editors who are hand-editing
20 the HTML, but is by no means necessary
as HTML Purifier has already
21 fixed all major errors the HTML may have had
. Tidy is a non
-default
22 extension
, and this directive will silently fail
if Tidy is not
26 If you are looking to make the overall look of your page
's source
27 better, I recommend running Tidy on the entire page rather than just
28 user-content (after all, the indentation relative to the containing
29 blocks will be incorrect).
32 This directive was available since 1.1.1.
36 HTMLPurifier_ConfigSchema::defineAlias('Core
', 'TidyFormat
', 'Output
', 'TidyFormat
');
38 HTMLPurifier_ConfigSchema::define('Output
', 'Newline
', null, 'string/null', '
40 Newline
string to format
final output with
. If left
null, HTML Purifier
41 will auto
-detect the
default newline type of the system
and use that
;
42 you can manually override it here
. Remember
, \r\n is Windows
, \r
43 is Mac
, and \n is Unix
. This directive was available since
2.0.1.
48 * Generates HTML from tokens.
49 * @todo Refactor interface so that configuration/context is determined
50 * upon instantiation, no need for messy generateFromTokens() calls
51 * @todo Make some of the more internal functions protected, and have
52 * unit tests work around that
54 class HTMLPurifier_Generator
58 * Bool cache of %HTML.XHTML
61 private $_xhtml = true;
64 * Bool cache of %Output.CommentScriptContents
67 private $_scriptFix = false;
70 * Cache of HTMLDefinition
76 * Generates HTML from an array of tokens.
77 * @param $tokens Array of HTMLPurifier_Token
78 * @param $config HTMLPurifier_Config object
79 * @return Generated HTML
81 public function generateFromTokens($tokens, $config, $context) {
83 if (!$config) $config = HTMLPurifier_Config::createDefault();
84 $this->_scriptFix = $config->get('Output
', 'CommentScriptContents
');
86 $this->_def = $config->getHTMLDefinition();
87 $this->_xhtml = $this->_def->doctype->xml;
89 if (!$tokens) return '';
90 for ($i = 0, $size = count($tokens); $i < $size; $i++) {
91 if ($this->_scriptFix && $tokens[$i]->name === 'script
'
92 && $i + 2 < $size && $tokens[$i+2]->type == 'end
') {
93 // script special case
94 // the contents of the script block must be ONE token
96 $html .= $this->generateFromToken($tokens[$i++]);
97 $html .= $this->generateScriptFromToken($tokens[$i++]);
98 // We're not going to
do this
: it wouldn
't be valid anyway
99 //while ($tokens[$i]->name != 'script
') {
100 // $html .= $this->generateScriptFromToken($tokens[$i++]);
103 $html .= $this->generateFromToken($tokens[$i]);
105 if ($config->get('Output
', 'TidyFormat
') && extension_loaded('tidy
')) {
107 $tidy_options = array(
109 'output
-xhtml
' => $this->_xhtml,
110 'show
-body
-only
' => true,
111 'indent
-spaces
' => 2,
114 if (version_compare(PHP_VERSION, '5', '<')) {
115 tidy_set_encoding('utf8
');
116 foreach ($tidy_options as $key => $value) {
117 tidy_setopt($key, $value);
119 tidy_parse_string($html);
121 $html = tidy_get_output();
124 $tidy->parseString($html, $tidy_options, 'utf8
');
125 $tidy->cleanRepair();
126 $html = (string) $tidy;
129 // normalize newlines to system
130 $nl = $config->get('Output
', 'Newline
');
131 if ($nl === null) $nl = PHP_EOL;
132 $html = str_replace("\n", $nl, $html);
137 * Generates HTML from a single token.
138 * @param $token HTMLPurifier_Token object.
139 * @return Generated HTML
141 public function generateFromToken($token) {
142 if (!isset($token->type)) return '';
143 if ($token->type == 'start
') {
144 $attr = $this->generateAttributes($token->attr, $token->name);
145 return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
147 } elseif ($token->type == 'end
') {
148 return '</' . $token->name . '>';
150 } elseif ($token->type == 'empty') {
151 $attr = $this->generateAttributes($token->attr, $token->name);
152 return '<' . $token->name . ($attr ? ' ' : '') . $attr .
153 ( $this->_xhtml ? ' /': '' )
156 } elseif ($token->type == 'text
') {
157 return $this->escape($token->data);
166 * Special case processor for the contents of script tags
167 * @warning This runs into problems if there's already a literal
168 * --> somewhere inside the script contents
.
170 public function generateScriptFromToken($token) {
171 if ($token->type
!= 'text') return $this->generateFromToken($token);
172 // return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
173 // more advanced version:
174 // thanks <http://lachy.id.au/log/2005/05/script-comments>
175 $data = preg_replace('#//\s*$#', '', $token->data
);
176 return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
180 * Generates attribute declarations from attribute array.
181 * @param $assoc_array_of_attributes Attribute array
182 * @return Generate HTML fragment for insertion.
184 public function generateAttributes($assoc_array_of_attributes, $element) {
186 foreach ($assoc_array_of_attributes as $key => $value) {
187 if (!$this->_xhtml
) {
188 // remove namespaced attributes
189 if (strpos($key, ':') !== false) continue;
190 if (!empty($this->_def
->info
[$element]->attr
[$key]->minimized
)) {
195 $html .= $key.'="'.$this->escape($value).'" ';
201 * Escapes raw text data.
202 * @param $string String data to escape for HTML.
203 * @return String escaped data.
205 public function escape($string) {
206 return htmlspecialchars($string, ENT_COMPAT
, 'UTF-8');