From 9573f0933de2ae389a7410026fcdc8e30cd1b81f Mon Sep 17 00:00:00 2001 From: Tomasz Muras Date: Fri, 10 Sep 2010 21:51:55 +0100 Subject: [PATCH] Make newline normalization optional. --- configdoc/usage.xml | 13 +++++++++---- library/HTMLPurifier.includes.php | 1 + library/HTMLPurifier.safe-includes.php | 1 + library/HTMLPurifier/ConfigSchema/schema.ser | Bin 13599 -> 13710 bytes .../schema/HTML.NewlineNormalization.txt | 9 +++++++++ library/HTMLPurifier/Lexer.php | 6 ++++-- library/HTMLPurifier/Lexer/PH5P.php | 2 -- tests/HTMLPurifier/LexerTest.php | 18 ++++++++++++++++++ 8 files changed, 42 insertions(+), 8 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (77%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/HTML.NewlineNormalization.txt diff --git a/configdoc/usage.xml b/configdoc/usage.xml index a8c465db..b0944479 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -6,7 +6,7 @@ 81 - 282 + 284 53 @@ -154,7 +154,7 @@ 202 - 269 + 271 27 @@ -214,14 +214,19 @@ 48 + + + 266 + + - 280 + 282 - 301 + 303 diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 2de7f190..260d82d8 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -201,6 +201,7 @@ require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/Munge.php'; require 'HTMLPurifier/URIScheme/data.php'; +require 'HTMLPurifier/URIScheme/file.php'; require 'HTMLPurifier/URIScheme/ftp.php'; require 'HTMLPurifier/URIScheme/http.php'; require 'HTMLPurifier/URIScheme/https.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index 630daaa1..ec68b498 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -195,6 +195,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; require_once $__dir . '/HTMLPurifier/URIScheme/data.php'; +require_once $__dir . '/HTMLPurifier/URIScheme/file.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/http.php'; require_once $__dir . '/HTMLPurifier/URIScheme/https.php'; diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 77% index 2d7cfa1ac4a68508920c6b1c5c12bc9b864d5188..528bd0de0ec7e766f2c923f2a55b7d01c6713e6e 100644 GIT binary patch delta 161 zcwU>I)t5cNoY7?RZ3Wqlp+Y=LMy6Iu9wELydVZe)&bYi8+~7i6xo&c}mtv qR)(7o%DiBn>?7bosxC&3v*?nI{(tcp&& + Whether or not to normalize newlines. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index f853421a..db14ebac 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -263,8 +263,10 @@ class HTMLPurifier_Lexer public function normalize($html, $config, $context) { // normalize newlines to \n - $html = str_replace("\r\n", "\n", $html); - $html = str_replace("\r", "\n", $html); + if ($config->get('HTML.NewlineNormalization')) { + $html = str_replace("\r\n", "\n", $html); + $html = str_replace("\r", "\n", $html); + } if ($config->get('HTML.Trusted')) { // escape convoluted CDATA diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php index fa1bf973..faf00b82 100644 --- a/library/HTMLPurifier/Lexer/PH5P.php +++ b/library/HTMLPurifier/Lexer/PH5P.php @@ -125,8 +125,6 @@ class HTML5 { const EOF = 5; public function __construct($data) { - $data = str_replace("\r\n", "\n", $data); - $data = str_replace("\r", null, $data); $this->data = $data; $this->char = -1; diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 0cb95155..79d1cf87 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -725,6 +725,24 @@ div {} ); } + function test_tokenizeHTML_removeNewline() { + $this->config->set('HTML.NewlineNormalization', true); + $input = "plain text\r\n"; + $expect = array( + new HTMLPurifier_Token_Text("plain text\n") + ); + } + + function test_tokenizeHTML_noRemoveNewline() { + $this->config->set('HTML.NewlineNormalization', false); + $input = "plain text\r\n"; + $expect = array( + new HTMLPurifier_Token_Text("plain text\r\n") + ); + $this->assertTokenization($input, $expect); + } + + /* function test_tokenizeHTML_() { -- 2.11.4.GIT