From c4aa3ee40ceeae5dcaca9abd46cbf9fc3eb7db36 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 22 Apr 2008 18:14:40 +0000 Subject: [PATCH] [3.1.0] Encoder optimization, as suggested by Diego git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1680 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 ++ TODO | 9 ++++++--- library/HTMLPurifier/Encoder.php | 16 +++++++++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index d21ff98d..eff400e9 100644 --- a/NEWS +++ b/NEWS @@ -12,7 +12,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 3.1.0, unknown release date - InterchangeBuilder now alphabetizes its lists - Validation error in configdoc output fixed +- Iconv errors muted even with custom error handlers . Out-of-date documentation revised +. UTF-8 encoding check optimization as suggested by Diego 3.1.0rc1, released 2008-04-22 # Autoload support added. Internal require_once's removed in favor of an diff --git a/TODO b/TODO index 94a3a4ab..a775cc4a 100644 --- a/TODO +++ b/TODO @@ -29,11 +29,14 @@ NICE FEATURES BUGS - Style attribute height/width limiting for images - - Easy way to blacklist elements and attributes - - Investigate iconv error emitting - - Investigate UTF-8 optimization - Figure out what to do about target="" and name="", since they show up so often +EXTERNAL + - Improve Phorum install documentation + - Mia + - Aliro + - Comparison: http://code.iamcal.com/php/lib_filter/ + FUTURE VERSIONS --------------- diff --git a/library/HTMLPurifier/Encoder.php b/library/HTMLPurifier/Encoder.php index 5e7a98c8..5ace301f 100644 --- a/library/HTMLPurifier/Encoder.php +++ b/library/HTMLPurifier/Encoder.php @@ -15,6 +15,11 @@ class HTMLPurifier_Encoder } /** + * Error-handler that mutes errors, alternative to shut-up operator. + */ + private static function muteErrorHandler() {} + + /** * Cleans a UTF-8 string for well-formedness and SGML validity * * It will parse according to UTF-8 and return a valid UTF8 string, with @@ -57,9 +62,18 @@ class HTMLPurifier_Encoder static $iconv = null; if ($iconv === null) $iconv = function_exists('iconv'); + // UTF-8 validity is checked since PHP 4.3.5 + // This is an optimization: if the string is already valid UTF-8, no + // need to do iconv/php stuff. 99% of the time, this will be the case. + if (preg_match('/^.{1}/us', $str)) { + return strtr($str, $non_sgml_chars); + } + if ($iconv && !$force_php) { // do the shortcut way - $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + $str = iconv('UTF-8', 'UTF-8//IGNORE', $str); + restore_error_handler(); return strtr($str, $non_sgml_chars); } -- 2.11.4.GIT