From 4164b2eb2b134d0d27cbce718b53ff9a442fd498 Mon Sep 17 00:00:00 2001 From: "Bradley M. Froehle" Date: Sun, 13 Feb 2011 17:47:01 -0800 Subject: [PATCH] Implement Iframe module, and provide %HTML.SafeIframe and %URI.SafeIframeRegexp for untrusted usage. The purpose of this addition is twofold. In trusted mode, iframes are now unconditionally allowed. However, many online video providers (YouTube, Vimeo) and other web applications (Google Maps, Google Calendar, etc) provide embed code in iframe format, which is useful functionality in untrusted mode. You can specify iframes as trusted elements with %HTML.SafeIframe; however, you need to additionally specify a whitelist mechanism such as %URI.SafeIframeRegexp to say what iframe embeds are OK (by default everything is rejected). Note: As iframes are invalid in strict doctypes, you will not be able to use them there. We also added an always_load parameter to URIFilters in order to support the strange nature of the SafeIframe URIFilter (it always needs to be loaded, due to the inability of accessing the %HTML.SafeIframe directive to see if it's needed!) We expect this URIFilter can expand in the future to offer more complex validation mechanisms. Signed-off-by: Bradley M. Froehle Signed-off-by: Edward Z. Yang --- NEWS | 3 ++ configdoc/usage.xml | 37 +++++++++++++------- library/HTMLPurifier.includes.php | 2 ++ library/HTMLPurifier.safe-includes.php | 2 ++ library/HTMLPurifier/AttrDef/URI.php | 2 +- library/HTMLPurifier/ConfigSchema/schema.ser | Bin 14435 -> 14609 bytes .../ConfigSchema/schema/HTML.SafeIframe.txt | 13 +++++++ .../ConfigSchema/schema/URI.SafeIframeRegexp.txt | 22 ++++++++++++ library/HTMLPurifier/HTMLModule/Forms.php | 2 +- library/HTMLPurifier/HTMLModule/Iframe.php | 38 +++++++++++++++++++++ library/HTMLPurifier/HTMLModuleManager.php | 8 +++-- library/HTMLPurifier/URIDefinition.php | 9 +++-- library/HTMLPurifier/URIFilter.php | 14 ++++++-- library/HTMLPurifier/URIFilter/SafeIframe.php | 35 +++++++++++++++++++ .../HTMLT/safe-iframe-googlemaps.htmlt | 8 +++++ tests/HTMLPurifier/HTMLT/safe-iframe-youtube.htmlt | 8 +++++ tests/HTMLPurifier/HTMLT/safe-iframe.htmlt | 14 ++++++++ 17 files changed, 196 insertions(+), 21 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (76%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt create mode 100644 library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt create mode 100644 library/HTMLPurifier/HTMLModule/Iframe.php create mode 100644 library/HTMLPurifier/URIFilter/SafeIframe.php create mode 100644 tests/HTMLPurifier/HTMLT/safe-iframe-googlemaps.htmlt create mode 100644 tests/HTMLPurifier/HTMLT/safe-iframe-youtube.htmlt create mode 100644 tests/HTMLPurifier/HTMLT/safe-iframe.htmlt diff --git a/NEWS b/NEWS index 992c2304..0088fb69 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier a standards compliant way, by moving them into the preceding
  • ! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for limited allowed comments in untrusted situations. +! Implement iframes, and allow them to be used in untrusted mode with + %HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle + for submitting an initial version of the patch. - Color keywords are now case insensitive. Thanks Yzmir Ramirez for reporting. - Explicitly initialize anonModule variable to null. diff --git a/configdoc/usage.xml b/configdoc/usage.xml index 17df6357..8e4a7d3c 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -169,7 +169,7 @@ - 202 + 204 271 @@ -186,37 +186,37 @@ - 209 + 211 - 210 + 212 - 220 + 222 - 223 + 225 - 226 + 228 - 229 + 231 - 232 + 234 @@ -254,7 +254,7 @@ - 55 + 59 12 @@ -262,7 +262,7 @@ - 64 + 69 81 @@ -270,12 +270,12 @@ - 65 + 70 - 72 + 77 @@ -419,6 +419,14 @@ 123 + + + 17 + + + 23 + + 14 @@ -513,4 +521,9 @@ 15 + + + 18 + + diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index e44ab1b3..ad9b604e 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -150,6 +150,7 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php'; require 'HTMLPurifier/HTMLModule/Edit.php'; require 'HTMLPurifier/HTMLModule/Forms.php'; require 'HTMLPurifier/HTMLModule/Hypertext.php'; +require 'HTMLPurifier/HTMLModule/Iframe.php'; require 'HTMLPurifier/HTMLModule/Image.php'; require 'HTMLPurifier/HTMLModule/Legacy.php'; require 'HTMLPurifier/HTMLModule/List.php'; @@ -205,6 +206,7 @@ require 'HTMLPurifier/URIFilter/DisableResources.php'; require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/Munge.php'; +require 'HTMLPurifier/URIFilter/SafeIframe.php'; require 'HTMLPurifier/URIScheme/data.php'; require 'HTMLPurifier/URIScheme/file.php'; require 'HTMLPurifier/URIScheme/ftp.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index d2a11792..75d9fd85 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -144,6 +144,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php'; +require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/List.php'; @@ -199,6 +200,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php'; require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; +require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php'; require_once $__dir . '/HTMLPurifier/URIScheme/data.php'; require_once $__dir . '/HTMLPurifier/URIScheme/file.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 01a6d83e..c2b68467 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef } public function make($string) { - $embeds = (bool) $string; + $embeds = ($string === 'embedded'); return new HTMLPurifier_AttrDef_URI($embeds); } diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 76% index aedf3d2b47a4e8e58a88f325bf8bba72fde1472c..bc9227ec4d77fc095664c3c3be94deb8dc0e7180 100644 GIT binary patch delta 169 zcwT#4FtKQYIitzu)q+{PY@TUFiMgqpP5{D+-jX s{U*l=`jD*WqLL}2`Q+7tvYSW2Tj^p>=I704d5jIsgCw delta 63 zcwU=W^tfPxIiu0!N@dxNp}xGE_ww##o;*X)8(qjn*@V$_bEPt`z~sHW9Gh1fyRtIX G)&c + Whether or not to permit iframe tags in untrusted documents. This + directive must be accompanied by a whitelist of permitted iframes, + such as %URI.SafeIframeRegexp, otherwise it will fatally error. + This directive has no effect on strict doctypes, as iframes are not + valid. +

    +--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt b/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt new file mode 100644 index 00000000..cdcc2f5d --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt @@ -0,0 +1,22 @@ +URI.SafeIframeRegexp +TYPE: string/null +VERSION: 4.3.1 +DEFAULT: NULL +--DESCRIPTION-- +

    + A PCRE regular expression that will be matched against an iframe URI. This is + a relatively inflexible scheme, but works well enough for the most common + use-case of iframes: embedded video. This directive only has an effect if + %HTML.SafeIframe is enabled. Here are some example values: +

    +
      +
    • %^http://www.youtube.com/embed/% - Allow YouTube videos
    • +
    • %^http://player.vimeo.com/video/% - Allow Vimeo videos
    • +
    • %^http://(www.youtube.com/embed/|player.vimeo.com/video/)% - Allow both
    • +
    +

    + Note that this directive does not give you enough granularity to, say, disable + all autoplay videos. Pipe up on the HTML Purifier forums if this + is a capability you want. +

    +--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Forms.php b/library/HTMLPurifier/HTMLModule/Forms.php index 44c22f6f..139df2d7 100644 --- a/library/HTMLPurifier/HTMLModule/Forms.php +++ b/library/HTMLPurifier/HTMLModule/Forms.php @@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule 'name' => 'CDATA', 'readonly' => 'Bool#readonly', 'size' => 'Number', - 'src' => 'URI#embeds', + 'src' => 'URI#embedded', 'tabindex' => 'Number', 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', 'value' => 'CDATA', diff --git a/library/HTMLPurifier/HTMLModule/Iframe.php b/library/HTMLPurifier/HTMLModule/Iframe.php new file mode 100644 index 00000000..287071ed --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Iframe.php @@ -0,0 +1,38 @@ +get('HTML.SafeIframe')) { + $this->safe = true; + } + $this->addElement( + 'iframe', 'Inline', 'Flow', 'Common', + array( + 'src' => 'URI#embedded', + 'width' => 'Length', + 'height' => 'Length', + 'name' => 'ID', + 'scrolling' => 'Enum#yes,no,auto', + 'frameborder' => 'Enum#0,1', + 'longdesc' => 'URI', + 'marginheight' => 'Pixels', + 'marginwidth' => 'Pixels', + ) + ); + } + +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index 92a05705..468cf46f 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // Unsafe: - 'Scripting', 'Object', 'Forms', + 'Scripting', 'Object', 'Forms', // Sorta legacy, but present in strict: 'Name', ); - $transitional = array('Legacy', 'Target'); + $transitional = array('Legacy', 'Target', 'Iframe'); $xml = array('XMLCommonAttributes'); $non_xml = array('NonXMLCommonAttributes'); @@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager $this->doctypes->register( 'XHTML 1.1', true, - array_merge($common, $xml, array('Ruby')), + // Iframe is a real XHTML 1.1 module, despite being + // "transitional"! + array_merge($common, $xml, array('Ruby', 'Iframe')), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 array(), '-//W3C//DTD XHTML 1.1//EN', diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php index ded5b9b6..40e57bb7 100644 --- a/library/HTMLPurifier/URIDefinition.php +++ b/library/HTMLPurifier/URIDefinition.php @@ -27,6 +27,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); + $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe()); $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); $this->registerFilter(new HTMLPurifier_URIFilter_Munge()); } @@ -52,9 +53,13 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition protected function setupFilters($config) { foreach ($this->registeredFilters as $name => $filter) { - $conf = $config->get('URI.' . $name); - if ($conf !== false && $conf !== null) { + if ($filter->always_load) { $this->addFilter($filter, $config); + } else { + $conf = $config->get('URI.' . $name); + if ($conf !== false && $conf !== null) { + $this->addFilter($filter, $config); + } } } unset($this->registeredFilters); diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php index 5d54c158..6a1b0b08 100644 --- a/library/HTMLPurifier/URIFilter.php +++ b/library/HTMLPurifier/URIFilter.php @@ -4,7 +4,9 @@ * Chainable filters for custom URI processing. * * These filters can perform custom actions on a URI filter object, - * including transformation or blacklisting. + * including transformation or blacklisting. A filter named Foo + * must have a corresponding configuration directive %URI.Foo, + * unless always_load is specified to be true. * * The following contexts may be available while URIFilters are being * processed: @@ -37,7 +39,15 @@ abstract class HTMLPurifier_URIFilter public $post = false; /** - * Performs initialization for the filter + * True if this filter should always be loaded (this permits + * a filter to be named Foo without the corresponding %URI.Foo + * directive existing.) + */ + public $always_load = false; + + /** + * Performs initialization for the filter. If the filter returns + * false, this means that it shouldn't be considered active. */ public function prepare($config) {return true;} diff --git a/library/HTMLPurifier/URIFilter/SafeIframe.php b/library/HTMLPurifier/URIFilter/SafeIframe.php new file mode 100644 index 00000000..284bb13d --- /dev/null +++ b/library/HTMLPurifier/URIFilter/SafeIframe.php @@ -0,0 +1,35 @@ +regexp = $config->get('URI.SafeIframeRegexp'); + return true; + } + public function filter(&$uri, $config, $context) { + // check if filter not applicable + if (!$config->get('HTML.SafeIframe')) return true; + // check if the filter should actually trigger + if (!$context->get('EmbeddedURI', true)) return true; + $token = $context->get('CurrentToken', true); + if (!($token && $token->name == 'iframe')) return true; + // check if we actually have some whitelists enabled + if ($this->regexp === null) return false; + // actually check the whitelists + return preg_match($this->regexp, $uri->toString()); + } +} + +// vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/safe-iframe-googlemaps.htmlt b/tests/HTMLPurifier/HTMLT/safe-iframe-googlemaps.htmlt new file mode 100644 index 00000000..40fac62d --- /dev/null +++ b/tests/HTMLPurifier/HTMLT/safe-iframe-googlemaps.htmlt @@ -0,0 +1,8 @@ +--INI-- +HTML.SafeIframe = true +URI.SafeIframeRegexp = "%^http://maps.google.com/%" +--HTML-- + +--EXPECT-- + +--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/safe-iframe-youtube.htmlt b/tests/HTMLPurifier/HTMLT/safe-iframe-youtube.htmlt new file mode 100644 index 00000000..1abc2c82 --- /dev/null +++ b/tests/HTMLPurifier/HTMLT/safe-iframe-youtube.htmlt @@ -0,0 +1,8 @@ +--INI-- +HTML.SafeIframe = true +URI.SafeIframeRegexp = "%^http://www.youtube.com/embed/%" +--HTML-- + +--EXPECT-- + +--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/safe-iframe.htmlt b/tests/HTMLPurifier/HTMLT/safe-iframe.htmlt new file mode 100644 index 00000000..7c0b60d2 --- /dev/null +++ b/tests/HTMLPurifier/HTMLT/safe-iframe.htmlt @@ -0,0 +1,14 @@ +--INI-- +HTML.SafeIframe = true +URI.SafeIframeRegexp = "%(^http://www.example.com/|^https?://dev.example.com/)%" +--HTML-- + + + + +--EXPECT-- + + + + +--# vim: et sw=4 sts=4 -- 2.11.4.GIT