From 9b10515fa44a89caba496bcafbf45b91704b32c2 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 25 Dec 2011 03:32:25 -0500 Subject: [PATCH] Core.EscapeNonASCIICharacters now always works, even if target is UTF-8. Signed-off-by: Edward Z. Yang --- NEWS | 2 ++ library/HTMLPurifier/Encoder.php | 6 +++--- tests/HTMLPurifier/EncoderTest.php | 8 ++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 4df76546..ed51c529 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 4.3.1, unknown release date # URI.Munge now munges URIs inside the same host that go from https to http. Reported by Neike Taika-Tessaro. +# Core.EscapeNonASCIICharacters now always transforms entities to + entities, even if target encoding is UTF-8. ! Added support for 'scope' attribute on tables. - Color keywords are now case insensitive. Thanks Yzmir Ramirez for reporting. diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index 890c890e..9fa76bd1 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -365,12 +365,12 @@ class HTMLPurifier_Encoder */ public static function convertFromUTF8($str, $config, $context) { $encoding = $config->get('Core.Encoding'); - if ($encoding === 'utf-8') return $str; - static $iconv = null; - if ($iconv === null) $iconv = self::iconvAvailable(); if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { $str = self::convertToASCIIDumbLossless($str); } + if ($encoding === 'utf-8') return $str; + static $iconv = null; + if ($iconv === null) $iconv = self::iconvAvailable(); if ($iconv && !$config->get('Test.ForceNoIconv')) { // Undo our previous fix in convertToUTF8, otherwise iconv will barf $ascii_fix = self::testEncodingSupportsASCII($encoding); diff --git a/tests/HTMLPurifier/EncoderTest.php b/tests/HTMLPurifier/EncoderTest.php index 944ace9b..6084c39f 100644 --- a/tests/HTMLPurifier/EncoderTest.php +++ b/tests/HTMLPurifier/EncoderTest.php @@ -123,7 +123,15 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context), "中文 (Chinese)" ); + } + function test_convertFromUTF8_withProtectionButUtf8() { + // Preserve the characters! + $this->config->set('Core.EscapeNonASCIICharacters', true); + $this->assertIdentical( + HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context), + "中文 (Chinese)" + ); } function test_convertToASCIIDumbLossless() { -- 2.11.4.GIT